No OneTemporary
Actions

Size

6 MB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/contrib/llvm-project/clang/include/clang/AST/ExprConcepts.h b/contrib/llvm-project/clang/include/clang/AST/ExprConcepts.h
	index f02c140c14c1..746a5b2fbfc6 100644
	--- a/contrib/llvm-project/clang/include/clang/AST/ExprConcepts.h
	+++ b/contrib/llvm-project/clang/include/clang/AST/ExprConcepts.h
	@@ -1,565 +1,573 @@
	//===- ExprConcepts.h - C++2a Concepts expressions --------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// Defines Expressions and AST nodes for C++2a concepts.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_AST_EXPRCONCEPTS_H
	#define LLVM_CLANG_AST_EXPRCONCEPTS_H

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTConcept.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclarationName.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/NestedNameSpecifier.h"
	#include "clang/AST/TemplateBase.h"
	#include "clang/AST/Type.h"
	#include "clang/Basic/SourceLocation.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/TrailingObjects.h"
	#include <utility>
	#include <string>

	namespace clang {
	class ASTStmtReader;
	class ASTStmtWriter;

	/// \brief Represents the specialization of a concept - evaluates to a prvalue
	/// of type bool.
	///
	/// According to C++2a [expr.prim.id]p3 an id-expression that denotes the
	/// specialization of a concept results in a prvalue of type bool.
	class ConceptSpecializationExpr final : public Expr, public ConceptReference {
	friend class ASTReader;
	friend class ASTStmtReader;

	public:
	using SubstitutionDiagnostic = std::pair<SourceLocation, std::string>;

	protected:
	/// \brief The Implicit Concept Specialization Decl, which holds the template
	/// arguments for this specialization.
	ImplicitConceptSpecializationDecl *SpecDecl;

	/// \brief Information about the satisfaction of the named concept with the
	/// given arguments. If this expression is value dependent, this is to be
	/// ignored.
	ASTConstraintSatisfaction *Satisfaction;

	ConceptSpecializationExpr(const ASTContext &C, NestedNameSpecifierLoc NNS,
	SourceLocation TemplateKWLoc,
	DeclarationNameInfo ConceptNameInfo,
	NamedDecl FoundDecl, ConceptDecl NamedConcept,
	const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction);

	ConceptSpecializationExpr(const ASTContext &C, ConceptDecl *NamedConcept,
	+ const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction,
	bool Dependent,
	bool ContainsUnexpandedParameterPack);
	ConceptSpecializationExpr(EmptyShell Empty);

	public:
	static ConceptSpecializationExpr *
	Create(const ASTContext &C, NestedNameSpecifierLoc NNS,
	SourceLocation TemplateKWLoc, DeclarationNameInfo ConceptNameInfo,
	NamedDecl FoundDecl, ConceptDecl NamedConcept,
	const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction);

	static ConceptSpecializationExpr *
	Create(const ASTContext &C, ConceptDecl *NamedConcept,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction, bool Dependent,
	bool ContainsUnexpandedParameterPack);

	+ static ConceptSpecializationExpr *
	+ Create(const ASTContext &C, ConceptDecl *NamedConcept,
	+ const ASTTemplateArgumentListInfo *ArgsAsWritten,
	+ ImplicitConceptSpecializationDecl *SpecDecl,
	+ const ConstraintSatisfaction *Satisfaction, bool Dependent,
	+ bool ContainsUnexpandedParameterPack);
	+
	ArrayRef<TemplateArgument> getTemplateArguments() const {
	return SpecDecl->getTemplateArguments();
	}

	const ImplicitConceptSpecializationDecl *getSpecializationDecl() const {
	assert(SpecDecl && "Template Argument Decl not initialized");
	return SpecDecl;
	}

	/// \brief Whether or not the concept with the given arguments was satisfied
	/// when the expression was created.
	/// The expression must not be dependent.
	bool isSatisfied() const {
	assert(!isValueDependent() &&
	"isSatisfied called on a dependent ConceptSpecializationExpr");
	return Satisfaction->IsSatisfied;
	}

	/// \brief Get elaborated satisfaction info about the template arguments'
	/// satisfaction of the named concept.
	/// The expression must not be dependent.
	const ASTConstraintSatisfaction &getSatisfaction() const {
	assert(!isValueDependent() &&
	"getSatisfaction called on dependent ConceptSpecializationExpr");
	return *Satisfaction;
	}

	static bool classof(const Stmt *T) {
	return T->getStmtClass() == ConceptSpecializationExprClass;
	}

	SourceLocation getBeginLoc() const LLVM_READONLY {
	if (auto QualifierLoc = getNestedNameSpecifierLoc())
	return QualifierLoc.getBeginLoc();
	return ConceptName.getBeginLoc();
	}

	SourceLocation getEndLoc() const LLVM_READONLY {
	// If the ConceptSpecializationExpr is the ImmediatelyDeclaredConstraint
	// of a TypeConstraint written syntactically as a constrained-parameter,
	// there may not be a template argument list.
	return ArgsAsWritten->RAngleLoc.isValid() ? ArgsAsWritten->RAngleLoc
	: ConceptName.getEndLoc();
	}

	// Iterators
	child_range children() {
	return child_range(child_iterator(), child_iterator());
	}
	const_child_range children() const {
	return const_child_range(const_child_iterator(), const_child_iterator());
	}
	};

	namespace concepts {

	/// \brief A static requirement that can be used in a requires-expression to
	/// check properties of types and expression.
	class Requirement {
	public:
	// Note - simple and compound requirements are both represented by the same
	// class (ExprRequirement).
	enum RequirementKind { RK_Type, RK_Simple, RK_Compound, RK_Nested };
	private:
	const RequirementKind Kind;
	// FIXME: use RequirementDependence to model dependence?
	bool Dependent : 1;
	bool ContainsUnexpandedParameterPack : 1;
	bool Satisfied : 1;
	public:
	struct SubstitutionDiagnostic {
	StringRef SubstitutedEntity;
	// FIXME: Store diagnostics semantically and not as prerendered strings.
	// Fixing this probably requires serialization of PartialDiagnostic
	// objects.
	SourceLocation DiagLoc;
	StringRef DiagMessage;
	};

	Requirement(RequirementKind Kind, bool IsDependent,
	bool ContainsUnexpandedParameterPack, bool IsSatisfied = true) :
	Kind(Kind), Dependent(IsDependent),
	ContainsUnexpandedParameterPack(ContainsUnexpandedParameterPack),
	Satisfied(IsSatisfied) {}

	RequirementKind getKind() const { return Kind; }

	bool isSatisfied() const {
	assert(!Dependent &&
	"isSatisfied can only be called on non-dependent requirements.");
	return Satisfied;
	}

	void setSatisfied(bool IsSatisfied) {
	assert(!Dependent &&
	"setSatisfied can only be called on non-dependent requirements.");
	Satisfied = IsSatisfied;
	}

	void setDependent(bool IsDependent) { Dependent = IsDependent; }
	bool isDependent() const { return Dependent; }

	void setContainsUnexpandedParameterPack(bool Contains) {
	ContainsUnexpandedParameterPack = Contains;
	}
	bool containsUnexpandedParameterPack() const {
	return ContainsUnexpandedParameterPack;
	}
	};

	/// \brief A requires-expression requirement which queries the existence of a
	/// type name or type template specialization ('type' requirements).
	class TypeRequirement : public Requirement {
	public:
	enum SatisfactionStatus {
	SS_Dependent,
	SS_SubstitutionFailure,
	SS_Satisfied
	};
	private:
	llvm::PointerUnion<SubstitutionDiagnostic , TypeSourceInfo > Value;
	SatisfactionStatus Status;
	public:
	friend ASTStmtReader;
	friend ASTStmtWriter;

	/// \brief Construct a type requirement from a type. If the given type is not
	/// dependent, this indicates that the type exists and the requirement will be
	/// satisfied. Otherwise, the SubstitutionDiagnostic constructor is to be
	/// used.
	TypeRequirement(TypeSourceInfo *T);

	/// \brief Construct a type requirement when the nested name specifier is
	/// invalid due to a bad substitution. The requirement is unsatisfied.
	TypeRequirement(SubstitutionDiagnostic *Diagnostic) :
	Requirement(RK_Type, false, false, false), Value(Diagnostic),
	Status(SS_SubstitutionFailure) {}

	SatisfactionStatus getSatisfactionStatus() const { return Status; }
	void setSatisfactionStatus(SatisfactionStatus Status) {
	this->Status = Status;
	}

	bool isSubstitutionFailure() const {
	return Status == SS_SubstitutionFailure;
	}

	SubstitutionDiagnostic *getSubstitutionDiagnostic() const {
	assert(Status == SS_SubstitutionFailure &&
	"Attempted to get substitution diagnostic when there has been no "
	"substitution failure.");
	return Value.get<SubstitutionDiagnostic *>();
	}

	TypeSourceInfo *getType() const {
	assert(!isSubstitutionFailure() &&
	"Attempted to get type when there has been a substitution failure.");
	return Value.get<TypeSourceInfo *>();
	}

	static bool classof(const Requirement *R) {
	return R->getKind() == RK_Type;
	}
	};

	/// \brief A requires-expression requirement which queries the validity and
	/// properties of an expression ('simple' and 'compound' requirements).
	class ExprRequirement : public Requirement {
	public:
	enum SatisfactionStatus {
	SS_Dependent,
	SS_ExprSubstitutionFailure,
	SS_NoexceptNotMet,
	SS_TypeRequirementSubstitutionFailure,
	SS_ConstraintsNotSatisfied,
	SS_Satisfied
	};
	class ReturnTypeRequirement {
	llvm::PointerIntPair<
	llvm::PointerUnion<TemplateParameterList , SubstitutionDiagnostic >,
	1, bool>
	TypeConstraintInfo;
	public:
	friend ASTStmtReader;
	friend ASTStmtWriter;

	/// \brief No return type requirement was specified.
	ReturnTypeRequirement() : TypeConstraintInfo(nullptr, false) {}

	/// \brief A return type requirement was specified but it was a
	/// substitution failure.
	ReturnTypeRequirement(SubstitutionDiagnostic *SubstDiag) :
	TypeConstraintInfo(SubstDiag, false) {}

	/// \brief A 'type constraint' style return type requirement.
	/// \param TPL an invented template parameter list containing a single
	/// type parameter with a type-constraint.
	// TODO: Can we maybe not save the whole template parameter list and just
	// the type constraint? Saving the whole TPL makes it easier to handle in
	// serialization but is less elegant.
	ReturnTypeRequirement(TemplateParameterList *TPL);

	bool isDependent() const {
	return TypeConstraintInfo.getInt();
	}

	bool containsUnexpandedParameterPack() const {
	if (!isTypeConstraint())
	return false;
	return getTypeConstraintTemplateParameterList()
	->containsUnexpandedParameterPack();
	}

	bool isEmpty() const {
	return TypeConstraintInfo.getPointer().isNull();
	}

	bool isSubstitutionFailure() const {
	return !isEmpty() &&
	TypeConstraintInfo.getPointer().is<SubstitutionDiagnostic *>();
	}

	bool isTypeConstraint() const {
	return !isEmpty() &&
	TypeConstraintInfo.getPointer().is<TemplateParameterList *>();
	}

	SubstitutionDiagnostic *getSubstitutionDiagnostic() const {
	assert(isSubstitutionFailure());
	return TypeConstraintInfo.getPointer().get<SubstitutionDiagnostic *>();
	}

	const TypeConstraint *getTypeConstraint() const;

	TemplateParameterList *getTypeConstraintTemplateParameterList() const {
	assert(isTypeConstraint());
	return TypeConstraintInfo.getPointer().get<TemplateParameterList *>();
	}
	};
	private:
	llvm::PointerUnion<Expr , SubstitutionDiagnostic > Value;
	SourceLocation NoexceptLoc; // May be empty if noexcept wasn't specified.
	ReturnTypeRequirement TypeReq;
	ConceptSpecializationExpr *SubstitutedConstraintExpr;
	SatisfactionStatus Status;
	public:
	friend ASTStmtReader;
	friend ASTStmtWriter;

	/// \brief Construct a compound requirement.
	/// \param E the expression which is checked by this requirement.
	/// \param IsSimple whether this was a simple requirement in source.
	/// \param NoexceptLoc the location of the noexcept keyword, if it was
	/// specified, otherwise an empty location.
	/// \param Req the requirement for the type of the checked expression.
	/// \param Status the satisfaction status of this requirement.
	ExprRequirement(
	Expr *E, bool IsSimple, SourceLocation NoexceptLoc,
	ReturnTypeRequirement Req, SatisfactionStatus Status,
	ConceptSpecializationExpr *SubstitutedConstraintExpr = nullptr);

	/// \brief Construct a compound requirement whose expression was a
	/// substitution failure. The requirement is not satisfied.
	/// \param E the diagnostic emitted while instantiating the original
	/// expression.
	/// \param IsSimple whether this was a simple requirement in source.
	/// \param NoexceptLoc the location of the noexcept keyword, if it was
	/// specified, otherwise an empty location.
	/// \param Req the requirement for the type of the checked expression (omit
	/// if no requirement was specified).
	ExprRequirement(SubstitutionDiagnostic *E, bool IsSimple,
	SourceLocation NoexceptLoc, ReturnTypeRequirement Req = {});

	bool isSimple() const { return getKind() == RK_Simple; }
	bool isCompound() const { return getKind() == RK_Compound; }

	bool hasNoexceptRequirement() const { return NoexceptLoc.isValid(); }
	SourceLocation getNoexceptLoc() const { return NoexceptLoc; }

	SatisfactionStatus getSatisfactionStatus() const { return Status; }

	bool isExprSubstitutionFailure() const {
	return Status == SS_ExprSubstitutionFailure;
	}

	const ReturnTypeRequirement &getReturnTypeRequirement() const {
	return TypeReq;
	}

	ConceptSpecializationExpr *
	getReturnTypeRequirementSubstitutedConstraintExpr() const {
	assert(Status >= SS_TypeRequirementSubstitutionFailure);
	return SubstitutedConstraintExpr;
	}

	SubstitutionDiagnostic *getExprSubstitutionDiagnostic() const {
	assert(isExprSubstitutionFailure() &&
	"Attempted to get expression substitution diagnostic when there has "
	"been no expression substitution failure");
	return Value.get<SubstitutionDiagnostic *>();
	}

	Expr *getExpr() const {
	assert(!isExprSubstitutionFailure() &&
	"ExprRequirement has no expression because there has been a "
	"substitution failure.");
	return Value.get<Expr *>();
	}

	static bool classof(const Requirement *R) {
	return R->getKind() == RK_Compound \|\| R->getKind() == RK_Simple;
	}
	};

	/// \brief A requires-expression requirement which is satisfied when a general
	/// constraint expression is satisfied ('nested' requirements).
	class NestedRequirement : public Requirement {
	Expr *Constraint = nullptr;
	const ASTConstraintSatisfaction *Satisfaction = nullptr;
	bool HasInvalidConstraint = false;
	StringRef InvalidConstraintEntity;

	public:
	friend ASTStmtReader;
	friend ASTStmtWriter;

	NestedRequirement(Expr *Constraint)
	: Requirement(RK_Nested, /IsDependent=/true,
	Constraint->containsUnexpandedParameterPack()),
	Constraint(Constraint) {
	assert(Constraint->isInstantiationDependent() &&
	"Nested requirement with non-dependent constraint must be "
	"constructed with a ConstraintSatisfaction object");
	}

	NestedRequirement(ASTContext &C, Expr *Constraint,
	const ConstraintSatisfaction &Satisfaction)
	: Requirement(RK_Nested, Constraint->isInstantiationDependent(),
	Constraint->containsUnexpandedParameterPack(),
	Satisfaction.IsSatisfied),
	Constraint(Constraint),
	Satisfaction(ASTConstraintSatisfaction::Create(C, Satisfaction)) {}

	NestedRequirement(StringRef InvalidConstraintEntity,
	const ASTConstraintSatisfaction *Satisfaction)
	: Requirement(RK_Nested,
	/IsDependent=/false,
	/ContainsUnexpandedParameterPack/ false,
	Satisfaction->IsSatisfied),
	Satisfaction(Satisfaction), HasInvalidConstraint(true),
	InvalidConstraintEntity(InvalidConstraintEntity) {}

	NestedRequirement(ASTContext &C, StringRef InvalidConstraintEntity,
	const ConstraintSatisfaction &Satisfaction)
	: NestedRequirement(InvalidConstraintEntity,
	ASTConstraintSatisfaction::Create(C, Satisfaction)) {}

	bool hasInvalidConstraint() const { return HasInvalidConstraint; }

	StringRef getInvalidConstraintEntity() {
	assert(hasInvalidConstraint());
	return InvalidConstraintEntity;
	}

	Expr *getConstraintExpr() const {
	assert(!hasInvalidConstraint() &&
	"getConstraintExpr() may not be called "
	"on nested requirements with invalid constraint.");
	return Constraint;
	}

	const ASTConstraintSatisfaction &getConstraintSatisfaction() const {
	return *Satisfaction;
	}

	static bool classof(const Requirement *R) {
	return R->getKind() == RK_Nested;
	}
	};

	} // namespace concepts

	/// C++2a [expr.prim.req]:
	/// A requires-expression provides a concise way to express requirements on
	/// template arguments. A requirement is one that can be checked by name
	/// lookup (6.4) or by checking properties of types and expressions.
	/// [...]
	/// A requires-expression is a prvalue of type bool [...]
	class RequiresExpr final : public Expr,
	llvm::TrailingObjects<RequiresExpr, ParmVarDecl *,
	concepts::Requirement *> {
	friend TrailingObjects;
	friend class ASTStmtReader;

	unsigned NumLocalParameters;
	unsigned NumRequirements;
	RequiresExprBodyDecl *Body;
	SourceLocation RBraceLoc;

	unsigned numTrailingObjects(OverloadToken<ParmVarDecl *>) const {
	return NumLocalParameters;
	}

	unsigned numTrailingObjects(OverloadToken<concepts::Requirement *>) const {
	return NumRequirements;
	}

	RequiresExpr(ASTContext &C, SourceLocation RequiresKWLoc,
	RequiresExprBodyDecl *Body,
	ArrayRef<ParmVarDecl *> LocalParameters,
	ArrayRef<concepts::Requirement *> Requirements,
	SourceLocation RBraceLoc);
	RequiresExpr(ASTContext &C, EmptyShell Empty, unsigned NumLocalParameters,
	unsigned NumRequirements);

	public:
	static RequiresExpr *
	Create(ASTContext &C, SourceLocation RequiresKWLoc,
	RequiresExprBodyDecl Body, ArrayRef<ParmVarDecl > LocalParameters,
	ArrayRef<concepts::Requirement *> Requirements,
	SourceLocation RBraceLoc);
	static RequiresExpr *
	Create(ASTContext &C, EmptyShell Empty, unsigned NumLocalParameters,
	unsigned NumRequirements);

	ArrayRef<ParmVarDecl *> getLocalParameters() const {
	return {getTrailingObjects<ParmVarDecl *>(), NumLocalParameters};
	}

	RequiresExprBodyDecl *getBody() const { return Body; }

	ArrayRef<concepts::Requirement *> getRequirements() const {
	return {getTrailingObjects<concepts::Requirement *>(), NumRequirements};
	}

	/// \brief Whether or not the requires clause is satisfied.
	/// The expression must not be dependent.
	bool isSatisfied() const {
	assert(!isValueDependent()
	&& "isSatisfied called on a dependent RequiresExpr");
	return RequiresExprBits.IsSatisfied;
	}

	void setSatisfied(bool IsSatisfied) {
	assert(!isValueDependent() &&
	"setSatisfied called on a dependent RequiresExpr");
	RequiresExprBits.IsSatisfied = IsSatisfied;
	}

	SourceLocation getRequiresKWLoc() const {
	return RequiresExprBits.RequiresKWLoc;
	}

	SourceLocation getRBraceLoc() const { return RBraceLoc; }

	static bool classof(const Stmt *T) {
	return T->getStmtClass() == RequiresExprClass;
	}

	SourceLocation getBeginLoc() const LLVM_READONLY {
	return RequiresExprBits.RequiresKWLoc;
	}
	SourceLocation getEndLoc() const LLVM_READONLY {
	return RBraceLoc;
	}

	// Iterators
	child_range children() {
	return child_range(child_iterator(), child_iterator());
	}
	const_child_range children() const {
	return const_child_range(const_child_iterator(), const_child_iterator());
	}
	};

	} // namespace clang

	#endif // LLVM_CLANG_AST_EXPRCONCEPTS_H
	diff --git a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td
	index 31a27558bca4..e4651678603d 100644
	--- a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td
	+++ b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td
	@@ -1,11796 +1,11797 @@
	//==--- DiagnosticSemaKinds.td - libsema diagnostics ----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// Semantic Analysis
	//===----------------------------------------------------------------------===//

	let Component = "Sema" in {
	let CategoryName = "Semantic Issue" in {
	def note_previous_decl : Note<"%0 declared here">;
	def note_entity_declared_at : Note<"%0 declared here">;
	def note_callee_decl : Note<"%0 declared here">;
	def note_defined_here : Note<"%0 defined here">;

	// For loop analysis
	def warn_variables_not_in_loop_body : Warning<
	"variable%select{s\| %1\|s %1 and %2\|s %1, %2, and %3\|s %1, %2, %3, and %4}0 "
	"used in loop condition not modified in loop body">,
	InGroup<ForLoopAnalysis>, DefaultIgnore;
	def warn_redundant_loop_iteration : Warning<
	"variable %0 is %select{decremented\|incremented}1 both in the loop header "
	"and in the loop body">,
	InGroup<ForLoopAnalysis>, DefaultIgnore;
	def note_loop_iteration_here : Note<"%select{decremented\|incremented}0 here">;

	def warn_duplicate_enum_values : Warning<
	"element %0 has been implicitly assigned %1 which another element has "
	"been assigned">, InGroup<DiagGroup<"duplicate-enum">>, DefaultIgnore;
	def note_duplicate_element : Note<"element %0 also has value %1">;

	// Absolute value functions
	def warn_unsigned_abs : Warning<
	"taking the absolute value of unsigned type %0 has no effect">,
	InGroup<AbsoluteValue>;
	def note_remove_abs : Note<
	"remove the call to '%0' since unsigned values cannot be negative">;
	def warn_abs_too_small : Warning<
	"absolute value function %0 given an argument of type %1 but has parameter "
	"of type %2 which may cause truncation of value">, InGroup<AbsoluteValue>;
	def warn_wrong_absolute_value_type : Warning<
	"using %select{integer\|floating point\|complex}1 absolute value function %0 "
	"when argument is of %select{integer\|floating point\|complex}2 type">,
	InGroup<AbsoluteValue>;
	def note_replace_abs_function : Note<"use function '%0' instead">;
	def warn_pointer_abs : Warning<
	"taking the absolute value of %select{pointer\|function\|array}0 type %1 is suspicious">,
	InGroup<AbsoluteValue>;

	def warn_max_unsigned_zero : Warning<
	"taking the max of "
	"%select{a value and unsigned zero\|unsigned zero and a value}0 "
	"is always equal to the other value">,
	InGroup<MaxUnsignedZero>;
	def note_remove_max_call : Note<
	"remove call to max function and unsigned zero argument">;

	def warn_infinite_recursive_function : Warning<
	"all paths through this function will call itself">,
	InGroup<InfiniteRecursion>, DefaultIgnore;

	def warn_comma_operator : Warning<"possible misuse of comma operator here">,
	InGroup<DiagGroup<"comma">>, DefaultIgnore;
	def note_cast_to_void : Note<"cast expression to void to silence warning">;
	def note_cast_operand_to_int : Note<"cast one or both operands to int to silence this warning">;

	// Constant expressions
	def err_expr_not_ice : Error<
	"expression is not an %select{integer\|integral}0 constant expression">;
	def ext_expr_not_ice : Extension<
	"expression is not an %select{integer\|integral}0 constant expression; "
	"folding it to a constant is a GNU extension">, InGroup<GNUFoldingConstant>;
	def err_typecheck_converted_constant_expression : Error<
	"value of type %0 is not implicitly convertible to %1">;
	def err_typecheck_converted_constant_expression_disallowed : Error<
	"conversion from %0 to %1 is not allowed in a converted constant expression">;
	def err_typecheck_converted_constant_expression_indirect : Error<
	"conversion from %0 to %1 in converted constant expression would "
	"bind reference to a temporary">;
	def err_expr_not_cce : Error<
	"%select{case value\|enumerator value\|non-type template argument\|"
	"array size\|explicit specifier argument\|noexcept specifier argument}0 "
	"is not a constant expression">;
	def ext_cce_narrowing : ExtWarn<
	"%select{case value\|enumerator value\|non-type template argument\|"
	"array size\|explicit specifier argument\|noexcept specifier argument}0 "
	"%select{cannot be narrowed from type %2 to %3\|"
	"evaluates to %2, which cannot be narrowed to type %3}1">,
	InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure;
	def err_ice_not_integral : Error<
	"%select{integer\|integral}1 constant expression must have "
	"%select{integer\|integral or unscoped enumeration}1 type, not %0">;
	def err_ice_incomplete_type : Error<
	"integral constant expression has incomplete class type %0">;
	def err_ice_explicit_conversion : Error<
	"integral constant expression requires explicit conversion from %0 to %1">;
	def note_ice_conversion_here : Note<
	"conversion to %select{integral\|enumeration}0 type %1 declared here">;
	def err_ice_ambiguous_conversion : Error<
	"ambiguous conversion from type %0 to an integral or unscoped "
	"enumeration type">;
	def err_ice_too_large : Error<
	"integer constant expression evaluates to value %0 that cannot be "
	"represented in a %1-bit %select{signed\|unsigned}2 integer type">;
	def err_expr_not_string_literal : Error<"expression is not a string literal">;

	// Semantic analysis of constant literals.
	def ext_predef_outside_function : Warning<
	"predefined identifier is only valid inside function">,
	InGroup<DiagGroup<"predefined-identifier-outside-function">>;
	def warn_float_overflow : Warning<
	"magnitude of floating-point constant too large for type %0; maximum is %1">,
	InGroup<LiteralRange>;
	def warn_float_underflow : Warning<
	"magnitude of floating-point constant too small for type %0; minimum is %1">,
	InGroup<LiteralRange>;
	def warn_float_compare_literal : Warning<
	"floating-point comparison is always %select{true\|false}0; "
	"constant cannot be represented exactly in type %1">,
	InGroup<LiteralRange>;
	def warn_double_const_requires_fp64 : Warning<
	"double precision constant requires %select{cl_khr_fp64\|cl_khr_fp64 and __opencl_c_fp64}0, "
	"casting to single precision">;
	def err_half_const_requires_fp16 : Error<
	"half precision constant requires cl_khr_fp16">;

	// C99 variable-length arrays
	def ext_vla : Extension<"variable length arrays are a C99 feature">,
	InGroup<VLAExtension>;
	def warn_vla_used : Warning<"variable length array used">,
	InGroup<VLA>, DefaultIgnore;
	def err_vla_in_sfinae : Error<
	"variable length array cannot be formed during template argument deduction">;
	def err_array_star_in_function_definition : Error<
	"variable length array must be bound in function definition">;
	def err_vla_decl_in_file_scope : Error<
	"variable length array declaration not allowed at file scope">;
	def err_vla_decl_has_static_storage : Error<
	"variable length array declaration cannot have 'static' storage duration">;
	def err_vla_decl_has_extern_linkage : Error<
	"variable length array declaration cannot have 'extern' linkage">;
	def ext_vla_folded_to_constant : ExtWarn<
	"variable length array folded to constant array as an extension">,
	InGroup<GNUFoldingConstant>;
	def err_vla_unsupported : Error<
	"variable length arrays are not supported for %select{the current target\|'%1'}0">;
	def note_vla_unsupported : Note<
	"variable length arrays are not supported for the current target">;

	// C99 variably modified types
	def err_variably_modified_template_arg : Error<
	"variably modified type %0 cannot be used as a template argument">;
	def err_variably_modified_nontype_template_param : Error<
	"non-type template parameter of variably modified type %0">;
	def err_variably_modified_new_type : Error<
	"'new' cannot allocate object of variably modified type %0">;

	// C99 Designated Initializers
	def ext_designated_init : Extension<
	"designated initializers are a C99 feature">, InGroup<C99Designator>;
	def err_array_designator_negative : Error<
	"array designator value '%0' is negative">;
	def err_array_designator_empty_range : Error<
	"array designator range [%0, %1] is empty">;
	def err_array_designator_non_array : Error<
	"array designator cannot initialize non-array type %0">;
	def err_array_designator_too_large : Error<
	"array designator index (%0) exceeds array bounds (%1)">;
	def err_field_designator_non_aggr : Error<
	"field designator cannot initialize a "
	"%select{non-struct, non-union\|non-class}0 type %1">;
	def err_field_designator_unknown : Error<
	"field designator %0 does not refer to any field in type %1">;
	def err_field_designator_nonfield : Error<
	"field designator %0 does not refer to a non-static data member">;
	def note_field_designator_found : Note<"field designator refers here">;
	def err_designator_for_scalar_or_sizeless_init : Error<
	"designator in initializer for %select{scalar\|indivisible sizeless}0 "
	"type %1">;
	def warn_initializer_overrides : Warning<
	"initializer %select{partially \|}0overrides prior initialization of "
	"this subobject">, InGroup<InitializerOverrides>;
	def ext_initializer_overrides : ExtWarn<warn_initializer_overrides.Summary>,
	InGroup<InitializerOverrides>, SFINAEFailure;
	def err_initializer_overrides_destructed : Error<
	"initializer would partially override prior initialization of object of "
	"type %1 with non-trivial destruction">;
	def note_previous_initializer : Note<
	"previous initialization %select{\|with side effects }0is here"
	"%select{\| (side effects will not occur at run time)}0">;
	def err_designator_into_flexible_array_member : Error<
	"designator into flexible array member subobject">;
	def note_flexible_array_member : Note<
	"initialized flexible array member %0 is here">;
	def ext_flexible_array_init : Extension<
	"flexible array initialization is a GNU extension">, InGroup<GNUFlexibleArrayInitializer>;

	// C++20 designated initializers
	def ext_cxx_designated_init : Extension<
	"designated initializers are a C++20 extension">, InGroup<CXX20Designator>,
	SuppressInSystemMacro;
	def warn_cxx17_compat_designated_init : Warning<
	"designated initializers are incompatible with C++ standards before C++20">,
	InGroup<CXXPre20CompatPedantic>, DefaultIgnore;
	def ext_designated_init_mixed : ExtWarn<
	"mixture of designated and non-designated initializers in the same "
	"initializer list is a C99 extension">, InGroup<C99Designator>;
	def note_designated_init_mixed : Note<
	"first non-designated initializer is here">;
	def ext_designated_init_array : ExtWarn<
	"array designators are a C99 extension">, InGroup<C99Designator>;
	def ext_designated_init_nested : ExtWarn<
	"nested designators are a C99 extension">, InGroup<C99Designator>;
	def ext_designated_init_reordered : ExtWarn<
	"ISO C++ requires field designators to be specified in declaration order; "
	"field %1 will be initialized after field %0">, InGroup<ReorderInitList>,
	SFINAEFailure;
	def note_previous_field_init : Note<
	"previous initialization for field %0 is here">;
	def ext_designated_init_brace_elision : ExtWarn<
	"brace elision for designated initializer is a C99 extension">,
	InGroup<C99Designator>, SFINAEFailure;

	// Declarations.
	def ext_plain_complex : ExtWarn<
	"plain '_Complex' requires a type specifier; assuming '_Complex double'">;
	def ext_imaginary_constant : Extension<
	"imaginary constants are a GNU extension">, InGroup<GNUImaginaryConstant>;
	def ext_integer_complex : Extension<
	"complex integer types are a GNU extension">, InGroup<GNUComplexInteger>;

	def err_invalid_saturation_spec : Error<"'_Sat' specifier is only valid on "
	"'_Fract' or '_Accum', not '%0'">;
	def err_invalid_sign_spec : Error<"'%0' cannot be signed or unsigned">;
	def err_invalid_width_spec : Error<
	"'%select{\|short\|long\|long long}0 %1' is invalid">;
	def err_invalid_complex_spec : Error<"'_Complex %0' is invalid">;

	def ext_auto_type_specifier : ExtWarn<
	"'auto' type specifier is a C++11 extension">, InGroup<CXX11>;
	def warn_auto_storage_class : Warning<
	"'auto' storage class specifier is redundant and incompatible with C++11">,
	InGroup<CXX11Compat>, DefaultIgnore;

	def warn_deprecated_register : Warning<
	"'register' storage class specifier is deprecated "
	"and incompatible with C++17">, InGroup<DeprecatedRegister>;
	def ext_register_storage_class : ExtWarn<
	"ISO C++17 does not allow 'register' storage class specifier">,
	DefaultError, InGroup<Register>;

	def err_invalid_decl_spec_combination : Error<
	"cannot combine with previous '%0' declaration specifier">;
	def err_invalid_vector_decl_spec_combination : Error<
	"cannot combine with previous '%0' declaration specifier. "
	"'__vector' must be first">;
	def err_invalid_pixel_decl_spec_combination : Error<
	"'__pixel' must be preceded by '__vector'. "
	"'%0' declaration specifier not allowed here">;
	def err_invalid_vector_bool_decl_spec : Error<
	"cannot use '%0' with '__vector bool'">;
	def err_invalid_vector_long_decl_spec : Error<
	"cannot use 'long' with '__vector'">;
	def err_invalid_vector_float_decl_spec : Error<
	"cannot use 'float' with '__vector'">;
	def err_invalid_vector_double_decl_spec : Error <
	"use of 'double' with '__vector' requires VSX support to be enabled "
	"(available on POWER7 or later)">;
	def err_invalid_vector_bool_int128_decl_spec : Error <
	"use of '__int128' with '__vector bool' requires VSX support enabled (on "
	"POWER10 or later)">;
	def err_invalid_vector_int128_decl_spec : Error<
	"use of '__int128' with '__vector' requires extended Altivec support"
	" (available on POWER8 or later)">;
	def err_invalid_vector_long_long_decl_spec : Error <
	"use of 'long long' with '__vector' requires VSX support (available on "
	"POWER7 or later) to be enabled">;
	def err_invalid_vector_long_double_decl_spec : Error<
	"cannot use 'long double' with '__vector'">;
	def warn_vector_long_decl_spec_combination : Warning<
	"Use of 'long' with '__vector' is deprecated">, InGroup<Deprecated>;

	def err_redeclaration_different_type : Error<
	"redeclaration of %0 with a different type%diff{: $ vs $\|}1,2">;
	def err_bad_variable_name : Error<
	"%0 cannot be the name of a variable or data member">;
	def err_bad_parameter_name : Error<
	"%0 cannot be the name of a parameter">;
	def err_bad_parameter_name_template_id : Error<
	"parameter name cannot have template arguments">;
	def ext_parameter_name_omitted_c2x : ExtWarn<
	"omitting the parameter name in a function definition is a C2x extension">,
	InGroup<C2x>;
	def err_anyx86_interrupt_attribute : Error<
	"%select{x86\|x86-64}0 'interrupt' attribute only applies to functions that "
	"have %select{a 'void' return type\|"
	"only a pointer parameter optionally followed by an integer parameter\|"
	"a pointer as the first parameter\|a %2 type as the second parameter}1">;
	def err_anyx86_interrupt_called : Error<
	"interrupt service routine cannot be called directly">;
	def warn_anyx86_interrupt_regsave : Warning<
	"interrupt service routine should only call a function"
	" with attribute 'no_caller_saved_registers'">,
	InGroup<DiagGroup<"interrupt-service-routine">>;
	def warn_arm_interrupt_calling_convention : Warning<
	"call to function without interrupt attribute could clobber interruptee's VFP registers">,
	InGroup<Extra>;
	def warn_interrupt_attribute_invalid : Warning<
	"%select{MIPS\|MSP430\|RISC-V}0 'interrupt' attribute only applies to "
	"functions that have %select{no parameters\|a 'void' return type}1">,
	InGroup<IgnoredAttributes>;
	def warn_riscv_repeated_interrupt_attribute : Warning<
	"repeated RISC-V 'interrupt' attribute">, InGroup<IgnoredAttributes>;
	def note_riscv_repeated_interrupt_attribute : Note<
	"repeated RISC-V 'interrupt' attribute is here">;
	def warn_unused_parameter : Warning<"unused parameter %0">,
	InGroup<UnusedParameter>, DefaultIgnore;
	def warn_unused_but_set_parameter : Warning<"parameter %0 set but not used">,
	InGroup<UnusedButSetParameter>, DefaultIgnore;
	def warn_unused_variable : Warning<"unused variable %0">,
	InGroup<UnusedVariable>, DefaultIgnore;
	def warn_unused_but_set_variable : Warning<"variable %0 set but not used">,
	InGroup<UnusedButSetVariable>, DefaultIgnore;
	def warn_unused_local_typedef : Warning<
	"unused %select{typedef\|type alias}0 %1">,
	InGroup<UnusedLocalTypedef>, DefaultIgnore;
	def warn_unused_property_backing_ivar :
	Warning<"ivar %0 which backs the property is not "
	"referenced in this property's accessor">,
	InGroup<UnusedPropertyIvar>, DefaultIgnore;
	def warn_unused_const_variable : Warning<"unused variable %0">,
	InGroup<UnusedConstVariable>, DefaultIgnore;
	def warn_unused_exception_param : Warning<"unused exception parameter %0">,
	InGroup<UnusedExceptionParameter>, DefaultIgnore;
	def warn_decl_in_param_list : Warning<
	"declaration of %0 will not be visible outside of this function">,
	InGroup<Visibility>;
	def warn_redefinition_in_param_list : Warning<
	"redefinition of %0 will not be visible outside of this function">,
	InGroup<Visibility>;
	def warn_empty_parens_are_function_decl : Warning<
	"empty parentheses interpreted as a function declaration">,
	InGroup<VexingParse>;
	def warn_parens_disambiguated_as_function_declaration : Warning<
	"parentheses were disambiguated as a function declaration">,
	InGroup<VexingParse>;
	def warn_parens_disambiguated_as_variable_declaration : Warning<
	"parentheses were disambiguated as redundant parentheses around declaration "
	"of variable named %0">, InGroup<VexingParse>;
	def warn_redundant_parens_around_declarator : Warning<
	"redundant parentheses surrounding declarator">,
	InGroup<DiagGroup<"redundant-parens">>, DefaultIgnore;
	def note_additional_parens_for_variable_declaration : Note<
	"add a pair of parentheses to declare a variable">;
	def note_raii_guard_add_name : Note<
	"add a variable name to declare a %0 initialized with %1">;
	def note_function_style_cast_add_parentheses : Note<
	"add enclosing parentheses to perform a function-style cast">;
	def note_remove_parens_for_variable_declaration : Note<
	"remove parentheses to silence this warning">;
	def note_empty_parens_function_call : Note<
	"change this ',' to a ';' to call %0">;
	def note_empty_parens_default_ctor : Note<
	"remove parentheses to declare a variable">;
	def note_empty_parens_zero_initialize : Note<
	"replace parentheses with an initializer to declare a variable">;
	def warn_unused_function : Warning<"unused function %0">,
	InGroup<UnusedFunction>, DefaultIgnore;
	def warn_unused_template : Warning<"unused %select{function\|variable}0 template %1">,
	InGroup<UnusedTemplate>, DefaultIgnore;
	def warn_unused_member_function : Warning<"unused member function %0">,
	InGroup<UnusedMemberFunction>, DefaultIgnore;
	def warn_used_but_marked_unused: Warning<"%0 was marked unused but was used">,
	InGroup<UsedButMarkedUnused>, DefaultIgnore;
	def warn_unneeded_internal_decl : Warning<
	"%select{function\|variable}0 %1 is not needed and will not be emitted">,
	InGroup<UnneededInternalDecl>, DefaultIgnore;
	def warn_unneeded_static_internal_decl : Warning<
	"'static' function %0 declared in header file "
	"should be declared 'static inline'">,
	InGroup<UnneededInternalDecl>, DefaultIgnore;
	def warn_unneeded_member_function : Warning<
	"member function %0 is not needed and will not be emitted">,
	InGroup<UnneededMemberFunction>, DefaultIgnore;
	def warn_unused_private_field: Warning<"private field %0 is not used">,
	InGroup<UnusedPrivateField>, DefaultIgnore;
	def warn_unused_lambda_capture: Warning<"lambda capture %0 is not "
	"%select{used\|required to be captured for this use}1">,
	InGroup<UnusedLambdaCapture>, DefaultIgnore;

	def warn_reserved_extern_symbol: Warning<
	"identifier %0 is reserved because %select{"
	"<ERROR>\|" // ReservedIdentifierStatus::NotReserved
	"it starts with '_' at global scope\|"
	"it starts with '_' and has C language linkage\|"
	"it starts with '__'\|"
	"it starts with '_' followed by a capital letter\|"
	"it contains '__'}1">,
	InGroup<ReservedIdentifier>, DefaultIgnore;

	def warn_parameter_size: Warning<
	"%0 is a large (%1 bytes) pass-by-value argument; "
	"pass it by reference instead ?">, InGroup<LargeByValueCopy>;
	def warn_return_value_size: Warning<
	"return value of %0 is a large (%1 bytes) pass-by-value object; "
	"pass it by reference instead ?">, InGroup<LargeByValueCopy>;
	def warn_return_value_udt: Warning<
	"%0 has C-linkage specified, but returns user-defined type %1 which is "
	"incompatible with C">, InGroup<ReturnTypeCLinkage>;
	def warn_return_value_udt_incomplete: Warning<
	"%0 has C-linkage specified, but returns incomplete type %1 which could be "
	"incompatible with C">, InGroup<ReturnTypeCLinkage>;
	def warn_implicit_function_decl : Warning<
	"implicit declaration of function %0">,
	InGroup<ImplicitFunctionDeclare>, DefaultIgnore;
	def ext_implicit_function_decl_c99 : ExtWarn<
	"call to undeclared function %0; ISO C99 and later do not support implicit "
	"function declarations">, InGroup<ImplicitFunctionDeclare>;
	def note_function_suggestion : Note<"did you mean %0?">;

	def err_ellipsis_first_param : Error<
	"ISO C requires a named parameter before '...'">;
	def err_declarator_need_ident : Error<"declarator requires an identifier">;
	def err_language_linkage_spec_unknown : Error<"unknown linkage language">;
	def err_language_linkage_spec_not_ascii : Error<
	"string literal in language linkage specifier cannot have an "
	"encoding-prefix">;
	def ext_use_out_of_scope_declaration : ExtWarn<
	"use of out-of-scope declaration of %0%select{\| whose type is not "
	"compatible with that of an implicit declaration}1">,
	InGroup<DiagGroup<"out-of-scope-function">>;
	def err_inline_non_function : Error<
	"'inline' can only appear on functions%select{\| and non-local variables}0">;
	def err_noreturn_non_function : Error<
	"'_Noreturn' can only appear on functions">;
	def warn_qual_return_type : Warning<
	"'%0' type qualifier%s1 on return type %plural{1:has\|:have}1 no effect">,
	InGroup<IgnoredQualifiers>, DefaultIgnore;
	def warn_deprecated_redundant_constexpr_static_def : Warning<
	"out-of-line definition of constexpr static data member is redundant "
	"in C++17 and is deprecated">,
	InGroup<Deprecated>, DefaultIgnore;

	def warn_decl_shadow :
	Warning<"declaration shadows a %select{"
	"local variable\|"
	"variable in %2\|"
	"static data member of %2\|"
	"field of %2\|"
	"typedef in %2\|"
	"type alias in %2\|"
	"structured binding}1">,
	InGroup<Shadow>, DefaultIgnore, SuppressInSystemMacro;
	def warn_decl_shadow_uncaptured_local :
	Warning<warn_decl_shadow.Summary>,
	InGroup<ShadowUncapturedLocal>, DefaultIgnore;
	def warn_ctor_parm_shadows_field:
	Warning<"constructor parameter %0 shadows the field %1 of %2">,
	InGroup<ShadowFieldInConstructor>, DefaultIgnore;
	def warn_modifying_shadowing_decl :
	Warning<"modifying constructor parameter %0 that shadows a "
	"field of %1">,
	InGroup<ShadowFieldInConstructorModified>, DefaultIgnore;

	// C++ decomposition declarations
	def err_decomp_decl_context : Error<
	"decomposition declaration not permitted in this context">;
	def warn_cxx14_compat_decomp_decl : Warning<
	"decomposition declarations are incompatible with "
	"C++ standards before C++17">, DefaultIgnore, InGroup<CXXPre17Compat>;
	def ext_decomp_decl : ExtWarn<
	"decomposition declarations are a C++17 extension">, InGroup<CXX17>;
	def ext_decomp_decl_cond : ExtWarn<
	"ISO C++17 does not permit structured binding declaration in a condition">,
	InGroup<DiagGroup<"binding-in-condition">>;
	def err_decomp_decl_spec : Error<
	"decomposition declaration cannot be declared "
	"%plural{1:'%1'\|:with '%1' specifiers}0">;
	def ext_decomp_decl_spec : ExtWarn<
	"decomposition declaration declared "
	"%plural{1:'%1'\|:with '%1' specifiers}0 is a C++20 extension">,
	InGroup<CXX20>;
	def warn_cxx17_compat_decomp_decl_spec : Warning<
	"decomposition declaration declared "
	"%plural{1:'%1'\|:with '%1' specifiers}0 "
	"is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;
	def err_decomp_decl_type : Error<
	"decomposition declaration cannot be declared with type %0; "
	"declared type must be 'auto' or reference to 'auto'">;
	def err_decomp_decl_constraint : Error<
	"decomposition declaration cannot be declared with constrained 'auto'">;
	def err_decomp_decl_parens : Error<
	"decomposition declaration cannot be declared with parentheses">;
	def err_decomp_decl_template : Error<
	"decomposition declaration template not supported">;
	def err_decomp_decl_not_alone : Error<
	"decomposition declaration must be the only declaration in its group">;
	def err_decomp_decl_requires_init : Error<
	"decomposition declaration %0 requires an initializer">;
	def err_decomp_decl_wrong_number_bindings : Error<
	"type %0 decomposes into %3 %plural{1:element\|:elements}2, but "
	"%select{%plural{0:no\|:only %1}1\|%1}4 "
	"%plural{1:name was\|:names were}1 provided">;
	def err_decomp_decl_unbindable_type : Error<
	"cannot decompose %select{union\|non-class, non-array}1 type %2">;
	def err_decomp_decl_multiple_bases_with_members : Error<
	"cannot decompose class type %1: "
	"%select{its base classes %2 and\|both it and its base class}0 %3 "
	"have non-static data members">;
	def err_decomp_decl_ambiguous_base : Error<
	"cannot decompose members of ambiguous base class %1 of %0:%2">;
	def err_decomp_decl_inaccessible_base : Error<
	"cannot decompose members of inaccessible base class %1 of %0">,
	AccessControl;
	def err_decomp_decl_inaccessible_field : Error<
	"cannot decompose %select{private\|protected}0 member %1 of %3">,
	AccessControl;
	def err_decomp_decl_lambda : Error<
	"cannot decompose lambda closure type">;
	def err_decomp_decl_anon_union_member : Error<
	"cannot decompose class type %0 because it has an anonymous "
	"%select{struct\|union}1 member">;
	def err_decomp_decl_std_tuple_element_not_specialized : Error<
	"cannot decompose this type; 'std::tuple_element<%0>::type' "
	"does not name a type">;
	def err_decomp_decl_std_tuple_size_not_constant : Error<
	"cannot decompose this type; 'std::tuple_size<%0>::value' "
	"is not a valid integral constant expression">;
	def note_in_binding_decl_init : Note<
	"in implicit initialization of binding declaration %0">;

	def err_std_type_trait_not_class_template : Error<
	"unsupported standard library implementation: "
	"'std::%0' is not a class template">;

	// C++ using declarations
	def err_using_requires_qualname : Error<
	"using declaration requires a qualified name">;
	def err_using_typename_non_type : Error<
	"'typename' keyword used on a non-type">;
	def err_using_dependent_value_is_type : Error<
	"dependent using declaration resolved to type without 'typename'">;
	def err_using_decl_nested_name_specifier_is_not_class : Error<
	"using declaration in class refers into '%0', which is not a class">;
	def warn_cxx17_compat_using_decl_non_member_enumerator : Warning<
	"member using declaration naming non-class '%0' enumerator is "
	"incompatible with C++ standards before C++20">, InGroup<CXXPre20Compat>,
	DefaultIgnore;
	def err_using_decl_nested_name_specifier_is_current_class : Error<
	"using declaration refers to its own class">;
	def err_using_decl_nested_name_specifier_is_not_base_class : Error<
	"using declaration refers into '%0', which is not a base class of %1">;
	def err_using_decl_constructor_not_in_direct_base : Error<
	"%0 is not a direct base of %1, cannot inherit constructors">;
	def err_using_decl_can_not_refer_to_class_member : Error<
	"using declaration cannot refer to class member">;
	def warn_cxx17_compat_using_decl_class_member_enumerator : Warning<
	"member using declaration naming a non-member enumerator is incompatible "
	"with C++ standards before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def err_using_enum_is_dependent : Error<
	"using-enum cannot name a dependent type">;
	def err_using_enum_not_enum : Error<
	"%0 is not an enumerated type">;
	def err_ambiguous_inherited_constructor : Error<
	"constructor of %0 inherited from multiple base class subobjects">;
	def note_ambiguous_inherited_constructor_using : Note<
	"inherited from base class %0 here">;
	def note_using_decl_class_member_workaround : Note<
	"use %select{an alias declaration\|a typedef declaration\|a reference\|"
	"a const variable\|a constexpr variable}0 instead">;
	def err_using_decl_can_not_refer_to_namespace : Error<
	"using declaration cannot refer to a namespace">;
	def warn_cxx17_compat_using_decl_scoped_enumerator: Warning<
	"using declaration naming a scoped enumerator is incompatible with "
	"C++ standards before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_using_decl_scoped_enumerator : ExtWarn<
	"using declaration naming a scoped enumerator is a C++20 extension">,
	InGroup<CXX20>;
	def err_using_decl_constructor : Error<
	"using declaration cannot refer to a constructor">;
	def warn_cxx98_compat_using_decl_constructor : Warning<
	"inheriting constructors are incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_using_decl_destructor : Error<
	"using declaration cannot refer to a destructor">;
	def err_using_decl_template_id : Error<
	"using declaration cannot refer to a template specialization">;
	def note_using_decl_target : Note<"target of using declaration">;
	def note_using_decl_conflict : Note<"conflicting declaration">;
	def err_using_decl_redeclaration : Error<"redeclaration of using declaration">;
	def err_using_decl_conflict : Error<
	"target of using declaration conflicts with declaration already in scope">;
	def err_using_decl_conflict_reverse : Error<
	"declaration conflicts with target of using declaration already in scope">;
	def note_using_decl : Note<"%select{\|previous }0using declaration">;
	def err_using_decl_redeclaration_expansion : Error<
	"using declaration pack expansion at block scope produces multiple values">;
	def err_use_of_empty_using_if_exists : Error<
	"reference to unresolved using declaration">;
	def note_empty_using_if_exists_here : Note<
	"using declaration annotated with 'using_if_exists' here">;
	def err_using_if_exists_on_ctor : Error<
	"'using_if_exists' attribute cannot be applied to an inheriting constructor">;
	def err_using_enum_decl_redeclaration : Error<
	"redeclaration of using-enum declaration">;
	def note_using_enum_decl : Note<"%select{\|previous }0using-enum declaration">;

	def warn_access_decl_deprecated : Warning<
	"access declarations are deprecated; use using declarations instead">,
	InGroup<Deprecated>;
	def err_access_decl : Error<
	"ISO C++11 does not allow access declarations; "
	"use using declarations instead">;
	def warn_deprecated_copy : Warning<
	"definition of implicit copy %select{constructor\|assignment operator}1 "
	"for %0 is deprecated because it has a user-declared copy "
	"%select{assignment operator\|constructor}1">,
	InGroup<DeprecatedCopy>, DefaultIgnore;
	def warn_deprecated_copy_with_dtor : Warning<
	"definition of implicit copy %select{constructor\|assignment operator}1 "
	"for %0 is deprecated because it has a user-declared destructor">,
	InGroup<DeprecatedCopyWithDtor>, DefaultIgnore;
	def warn_deprecated_copy_with_user_provided_copy: Warning<
	"definition of implicit copy %select{constructor\|assignment operator}1 "
	"for %0 is deprecated because it has a user-provided copy "
	"%select{assignment operator\|constructor}1">,
	InGroup<DeprecatedCopyWithUserProvidedCopy>, DefaultIgnore;
	def warn_deprecated_copy_with_user_provided_dtor : Warning<
	"definition of implicit copy %select{constructor\|assignment operator}1 "
	"for %0 is deprecated because it has a user-provided destructor">,
	InGroup<DeprecatedCopyWithUserProvidedDtor>, DefaultIgnore;
	def warn_cxx17_compat_exception_spec_in_signature : Warning<
	"mangled name of %0 will change in C++17 due to non-throwing exception "
	"specification in function signature">, InGroup<CXX17CompatMangling>;

	def warn_global_constructor : Warning<
	"declaration requires a global constructor">,
	InGroup<GlobalConstructors>, DefaultIgnore;
	def warn_global_destructor : Warning<
	"declaration requires a global destructor">,
	InGroup<GlobalConstructors>, DefaultIgnore;
	def warn_exit_time_destructor : Warning<
	"declaration requires an exit-time destructor">,
	InGroup<ExitTimeDestructors>, DefaultIgnore;

	def err_invalid_thread : Error<
	"'%0' is only allowed on variable declarations">;
	def err_thread_non_global : Error<
	"'%0' variables must have global storage">;
	def err_thread_unsupported : Error<
	"thread-local storage is not supported for the current target">;

	// FIXME: Combine fallout warnings to just one warning.
	def warn_maybe_falloff_nonvoid_function : Warning<
	"non-void function does not return a value in all control paths">,
	InGroup<ReturnType>;
	def warn_falloff_nonvoid_function : Warning<
	"non-void function does not return a value">,
	InGroup<ReturnType>;
	def err_maybe_falloff_nonvoid_block : Error<
	"non-void block does not return a value in all control paths">;
	def err_falloff_nonvoid_block : Error<
	"non-void block does not return a value">;
	def warn_maybe_falloff_nonvoid_coroutine : Warning<
	"non-void coroutine does not return a value in all control paths">,
	InGroup<ReturnType>;
	def warn_falloff_nonvoid_coroutine : Warning<
	"non-void coroutine does not return a value">,
	InGroup<ReturnType>;
	def warn_suggest_noreturn_function : Warning<
	"%select{function\|method}0 %1 could be declared with attribute 'noreturn'">,
	InGroup<MissingNoreturn>, DefaultIgnore;
	def warn_suggest_noreturn_block : Warning<
	"block could be declared with attribute 'noreturn'">,
	InGroup<MissingNoreturn>, DefaultIgnore;

	// Unreachable code.
	def warn_unreachable : Warning<
	"code will never be executed">,
	InGroup<UnreachableCode>, DefaultIgnore;
	def warn_unreachable_break : Warning<
	"'break' will never be executed">,
	InGroup<UnreachableCodeBreak>, DefaultIgnore;
	def warn_unreachable_return : Warning<
	"'return' will never be executed">,
	InGroup<UnreachableCodeReturn>, DefaultIgnore;
	def warn_unreachable_loop_increment : Warning<
	"loop will run at most once (loop increment never executed)">,
	InGroup<UnreachableCodeLoopIncrement>, DefaultIgnore;
	def warn_unreachable_fallthrough_attr : Warning<
	"fallthrough annotation in unreachable code">,
	InGroup<UnreachableCodeFallthrough>, DefaultIgnore;
	def note_unreachable_silence : Note<
	"silence by adding parentheses to mark code as explicitly dead">;
	def warn_unreachable_association : Warning<
	"due to lvalue conversion of the controlling expression, association of type "
	"%0 will never be selected because it is %select{of array type\|qualified}1">,
	InGroup<UnreachableCodeGenericAssoc>;

	/// Built-in functions.
	def ext_implicit_lib_function_decl : ExtWarn<
	"implicitly declaring library function '%0' with type %1">,
	InGroup<ImplicitFunctionDeclare>;
	def ext_implicit_lib_function_decl_c99 : ExtWarn<
	"call to undeclared library function '%0' with type %1; ISO C99 and later "
	"do not support implicit function declarations">,
	InGroup<ImplicitFunctionDeclare>;
	def note_include_header_or_declare : Note<
	"include the header <%0> or explicitly provide a declaration for '%1'">;
	def note_previous_builtin_declaration : Note<"%0 is a builtin with type %1">;
	def warn_implicit_decl_no_jmp_buf
	: Warning<"declaration of built-in function '%0' requires the declaration"
	" of the 'jmp_buf' type, commonly provided in the header <setjmp.h>.">,
	InGroup<DiagGroup<"incomplete-setjmp-declaration">>;
	def warn_implicit_decl_requires_sysheader : Warning<
	"declaration of built-in function '%1' requires inclusion of the header <%0>">,
	InGroup<BuiltinRequiresHeader>;
	def warn_redecl_library_builtin : Warning<
	"incompatible redeclaration of library function %0">,
	InGroup<DiagGroup<"incompatible-library-redeclaration">>;
	def err_builtin_definition : Error<"definition of builtin function %0">;
	def err_builtin_redeclare : Error<"cannot redeclare builtin function %0">;
	def err_arm_invalid_specialreg : Error<"invalid special register for builtin">;
	def err_arm_invalid_coproc : Error<"coprocessor %0 must be configured as "
	"%select{GCP\|CDE}1">;
	def err_invalid_cpu_supports : Error<"invalid cpu feature string for builtin">;
	def err_invalid_cpu_is : Error<"invalid cpu name for builtin">;
	def err_invalid_cpu_specific_dispatch_value : Error<
	"invalid option '%0' for %select{cpu_specific\|cpu_dispatch}1">;
	def warn_builtin_unknown : Warning<"use of unknown builtin %0">,
	InGroup<ImplicitFunctionDeclare>, DefaultError;
	def warn_cstruct_memaccess : Warning<
	"%select{destination for\|source of\|first operand of\|second operand of}0 this "
	"%1 call is a pointer to record %2 that is not trivial to "
	"%select{primitive-default-initialize\|primitive-copy}3">,
	InGroup<NonTrivialMemaccess>;
	def note_nontrivial_field : Note<
	"field is non-trivial to %select{copy\|default-initialize}0">;
	def err_non_trivial_c_union_in_invalid_context : Error<
	"cannot %select{"
	"use type %1 for a function/method parameter\|"
	"use type %1 for function/method return\|"
	"default-initialize an object of type %1\|"
	"declare an automatic variable of type %1\|"
	"copy-initialize an object of type %1\|"
	"assign to a variable of type %1\|"
	"construct an automatic compound literal of type %1\|"
	"capture a variable of type %1\|"
	"cannot use volatile type %1 where it causes an lvalue-to-rvalue conversion"
	"}3 "
	"since it %select{contains\|is}2 a union that is non-trivial to "
	"%select{default-initialize\|destruct\|copy}0">;
	def note_non_trivial_c_union : Note<
	"%select{%2 has subobjects that are\|%3 has type %2 that is}0 "
	"non-trivial to %select{default-initialize\|destruct\|copy}1">;
	def warn_dyn_class_memaccess : Warning<
	"%select{destination for\|source of\|first operand of\|second operand of}0 this "
	"%1 call is a pointer to %select{\|class containing a }2dynamic class %3; "
	"vtable pointer will be %select{overwritten\|copied\|moved\|compared}4">,
	InGroup<DynamicClassMemaccess>;
	def note_bad_memaccess_silence : Note<
	"explicitly cast the pointer to silence this warning">;
	def warn_sizeof_pointer_expr_memaccess : Warning<
	"'%0' call operates on objects of type %1 while the size is based on a "
	"different type %2">,
	InGroup<SizeofPointerMemaccess>;
	def warn_sizeof_pointer_expr_memaccess_note : Note<
	"did you mean to %select{dereference the argument to 'sizeof' (and multiply "
	"it by the number of elements)\|remove the addressof in the argument to "
	"'sizeof' (and multiply it by the number of elements)\|provide an explicit "
	"length}0?">;
	def warn_sizeof_pointer_type_memaccess : Warning<
	"argument to 'sizeof' in %0 call is the same pointer type %1 as the "
	"%select{destination\|source}2; expected %3 or an explicit length">,
	InGroup<SizeofPointerMemaccess>;
	def warn_strlcpycat_wrong_size : Warning<
	"size argument in %0 call appears to be size of the source; "
	"expected the size of the destination">,
	InGroup<DiagGroup<"strlcpy-strlcat-size">>;
	def note_strlcpycat_wrong_size : Note<
	"change size argument to be the size of the destination">;
	def warn_memsize_comparison : Warning<
	"size argument in %0 call is a comparison">,
	InGroup<DiagGroup<"memsize-comparison">>;
	def note_memsize_comparison_paren : Note<
	"did you mean to compare the result of %0 instead?">;
	def note_memsize_comparison_cast_silence : Note<
	"explicitly cast the argument to size_t to silence this warning">;
	def warn_suspicious_sizeof_memset : Warning<
	"%select{'size' argument to memset is '0'\|"
	"setting buffer to a 'sizeof' expression}0"
	"; did you mean to transpose the last two arguments?">,
	InGroup<MemsetTransposedArgs>;
	def note_suspicious_sizeof_memset_silence : Note<
	"%select{parenthesize the third argument\|"
	"cast the second argument to 'int'}0 to silence">;
	def warn_suspicious_bzero_size : Warning<"'size' argument to bzero is '0'">,
	InGroup<SuspiciousBzero>;
	def note_suspicious_bzero_size_silence : Note<
	"parenthesize the second argument to silence">;

	def warn_strncat_large_size : Warning<
	"the value of the size argument in 'strncat' is too large, might lead to a "
	"buffer overflow">, InGroup<StrncatSize>;
	def warn_strncat_src_size : Warning<"size argument in 'strncat' call appears "
	"to be size of the source">, InGroup<StrncatSize>;
	def warn_strncat_wrong_size : Warning<
	"the value of the size argument to 'strncat' is wrong">, InGroup<StrncatSize>;
	def note_strncat_wrong_size : Note<
	"change the argument to be the free space in the destination buffer minus "
	"the terminating null byte">;

	def warn_assume_side_effects : Warning<
	"the argument to %0 has side effects that will be discarded">,
	InGroup<DiagGroup<"assume">>;
	def warn_assume_attribute_string_unknown : Warning<
	"unknown assumption string '%0'; attribute is potentially ignored">,
	InGroup<UnknownAssumption>;
	def warn_assume_attribute_string_unknown_suggested : Warning<
	"unknown assumption string '%0' may be misspelled; attribute is potentially "
	"ignored, did you mean '%1'?">,
	InGroup<MisspelledAssumption>;

	def warn_builtin_chk_overflow : Warning<
	"'%0' will always overflow; destination buffer has size %1,"
	" but size argument is %2">,
	InGroup<DiagGroup<"builtin-memcpy-chk-size">>;

	def warn_fortify_source_overflow
	: Warning<warn_builtin_chk_overflow.Summary>, InGroup<FortifySource>;
	def warn_fortify_source_size_mismatch : Warning<
	"'%0' size argument is too large; destination buffer has size %1,"
	" but size argument is %2">, InGroup<FortifySource>;

	def warn_fortify_strlen_overflow: Warning<
	"'%0' will always overflow; destination buffer has size %1,"
	" but the source string has length %2 (including NUL byte)">,
	InGroup<FortifySource>;

	def warn_fortify_source_format_overflow : Warning<
	"'%0' will always overflow; destination buffer has size %1,"
	" but format string expands to at least %2">,
	InGroup<FortifySource>;

	def warn_fortify_scanf_overflow : Warning<
	"'%0' may overflow; destination buffer in argument %1 has size "
	"%2, but the corresponding specifier may require size %3">,
	InGroup<FortifySource>;

	def err_function_start_invalid_type: Error<
	"argument must be a function">;

	/// main()
	// static main() is not an error in C, just in C++.
	def warn_static_main : Warning<"'main' should not be declared static">,
	InGroup<Main>;
	def err_static_main : Error<"'main' is not allowed to be declared static">;
	def err_inline_main : Error<"'main' is not allowed to be declared inline">;
	def ext_variadic_main : ExtWarn<
	"'main' is not allowed to be declared variadic">, InGroup<Main>;
	def ext_noreturn_main : ExtWarn<
	"'main' is not allowed to be declared _Noreturn">, InGroup<Main>;
	def note_main_remove_noreturn : Note<"remove '_Noreturn'">;
	def err_constexpr_main : Error<
	"'main' is not allowed to be declared %select{constexpr\|consteval}0">;
	def err_deleted_main : Error<"'main' is not allowed to be deleted">;
	def err_mainlike_template_decl : Error<"%0 cannot be a template">;
	def err_main_returns_nonint : Error<"'main' must return 'int'">;
	def ext_main_returns_nonint : ExtWarn<"return type of 'main' is not 'int'">,
	InGroup<MainReturnType>;
	def note_main_change_return_type : Note<"change return type to 'int'">;
	def err_main_surplus_args : Error<"too many parameters (%0) for 'main': "
	"must be 0, 2, or 3">;
	def warn_main_one_arg : Warning<"only one parameter on 'main' declaration">,
	InGroup<Main>;
	def err_main_arg_wrong : Error<"%select{first\|second\|third\|fourth}0 "
	"parameter of 'main' (%select{argument count\|argument array\|environment\|"
	"platform-specific data}0) must be of type %1">;
	def warn_main_returns_bool_literal : Warning<"bool literal returned from "
	"'main'">, InGroup<Main>;
	def err_main_global_variable :
	Error<"main cannot be declared as global variable">;
	def warn_main_redefined : Warning<"variable named 'main' with external linkage "
	"has undefined behavior">, InGroup<Main>;
	def ext_main_used : Extension<
	"ISO C++ does not allow 'main' to be used by a program">, InGroup<Main>;

	/// parser diagnostics
	def ext_no_declarators : ExtWarn<"declaration does not declare anything">,
	InGroup<MissingDeclarations>;
	def err_no_declarators : Error<"declaration does not declare anything">;
	def ext_typedef_without_a_name : ExtWarn<"typedef requires a name">,
	InGroup<MissingDeclarations>;
	def err_typedef_not_identifier : Error<"typedef name must be an identifier">;

	def ext_non_c_like_anon_struct_in_typedef : ExtWarn<
	"anonymous non-C-compatible type given name for linkage purposes "
	"by %select{typedef\|alias}0 declaration; "
	"add a tag name here">, InGroup<DiagGroup<"non-c-typedef-for-linkage">>;
	def err_non_c_like_anon_struct_in_typedef : Error<
	"anonymous non-C-compatible type given name for linkage purposes "
	"by %select{typedef\|alias}0 declaration after its linkage was computed; "
	"add a tag name here to establish linkage prior to definition">;
	def err_typedef_changes_linkage : Error<
	"unsupported: anonymous type given name for linkage purposes "
	"by %select{typedef\|alias}0 declaration after its linkage was computed; "
	"add a tag name here to establish linkage prior to definition">;
	def note_non_c_like_anon_struct : Note<
	"type is not C-compatible due to this "
	"%select{base class\|default member initializer\|lambda expression\|"
	"friend declaration\|member declaration}0">;
	def note_typedef_for_linkage_here : Note<
	"type is given name %0 for linkage purposes by this "
	"%select{typedef\|alias}1 declaration">;

	def err_statically_allocated_object : Error<
	"interface type cannot be statically allocated">;
	def err_object_cannot_be_passed_returned_by_value : Error<
	"interface type %1 cannot be %select{returned\|passed}0 by value"
	"; did you forget * in %1?">;
	def err_parameters_retval_cannot_have_fp16_type : Error<
	"%select{parameters\|function return value}0 cannot have __fp16 type; did you forget * ?">;
	def err_opencl_half_load_store : Error<
	"%select{loading directly from\|assigning directly to}0 pointer to type %1 requires "
	"cl_khr_fp16. Use vector data %select{load\|store}0 builtin functions instead">;
	def err_opencl_cast_to_half : Error<"casting to type %0 is not allowed">;
	def err_opencl_half_declaration : Error<
	"declaring variable of type %0 is not allowed">;
	def err_opencl_invalid_param : Error<
	"declaring function parameter of type %0 is not allowed%select{; did you forget * ?\|}1">;
	def err_opencl_invalid_return : Error<
	"declaring function return value of type %0 is not allowed %select{; did you forget * ?\|}1">;
	def warn_enum_value_overflow : Warning<"overflow in enumeration value">;
	def warn_pragma_options_align_reset_failed : Warning<
	"#pragma options align=reset failed: %0">,
	InGroup<IgnoredPragmas>;
	def err_pragma_options_align_mac68k_target_unsupported : Error<
	"mac68k alignment pragma is not supported on this target">;
	def warn_pragma_align_not_xl_compatible : Warning<
	"#pragma align(packed) may not be compatible with objects generated with AIX XL C/C++">,
	InGroup<AIXCompat>;
	def warn_pragma_pack_invalid_alignment : Warning<
	"expected #pragma pack parameter to be '1', '2', '4', '8', or '16'">,
	InGroup<IgnoredPragmas>;
	def err_pragma_pack_invalid_alignment : Error<
	warn_pragma_pack_invalid_alignment.Summary>;
	def warn_pragma_pack_non_default_at_include : Warning<
	"non-default #pragma pack value changes the alignment of struct or union "
	"members in the included file">, InGroup<PragmaPackSuspiciousInclude>,
	DefaultIgnore;
	def warn_pragma_pack_modified_after_include : Warning<
	"the current #pragma pack alignment value is modified in the included "
	"file">, InGroup<PragmaPack>;
	def warn_pragma_pack_no_pop_eof : Warning<"unterminated "
	"'#pragma pack (push, ...)' at end of file">, InGroup<PragmaPack>;
	def note_pragma_pack_here : Note<
	"previous '#pragma pack' directive that modifies alignment is here">;
	def note_pragma_pack_pop_instead_reset : Note<
	"did you intend to use '#pragma pack (pop)' instead of '#pragma pack()'?">;
	// Follow the Microsoft implementation.
	def warn_pragma_pack_show : Warning<"value of #pragma pack(show) == %0">;
	def warn_pragma_pack_pop_identifier_and_alignment : Warning<
	"specifying both a name and alignment to 'pop' is undefined">;
	def warn_pragma_pop_failed : Warning<"#pragma %0(pop, ...) failed: %1">,
	InGroup<IgnoredPragmas>;
	def err_pragma_fc_pp_scope : Error<
	"'#pragma float_control push/pop' can only appear at file or namespace scope "
	"or within a language linkage specification">;
	def err_pragma_fc_noprecise_requires_nofenv : Error<
	"'#pragma float_control(precise, off)' is illegal when fenv_access is enabled">;
	def err_pragma_fc_except_requires_precise : Error<
	"'#pragma float_control(except, on)' is illegal when precise is disabled">;
	def err_pragma_fc_noprecise_requires_noexcept : Error<
	"'#pragma float_control(precise, off)' is illegal when except is enabled">;
	def err_pragma_fenv_requires_precise : Error<
	"'#pragma STDC FENV_ACCESS ON' is illegal when precise is disabled">;
	def warn_cxx_ms_struct :
	Warning<"ms_struct may not produce Microsoft-compatible layouts for classes "
	"with base classes or virtual functions">,
	DefaultError, InGroup<IncompatibleMSStruct>;
	def err_pragma_pack_identifer_not_supported : Error<
	"specifying an identifier within `#pragma pack` is not supported on this target">;
	def err_section_conflict : Error<"%0 causes a section type conflict with %1">;
	def err_no_base_classes : Error<"invalid use of '__super', %0 has no base classes">;
	def err_invalid_super_scope : Error<"invalid use of '__super', "
	"this keyword can only be used inside class or member function scope">;
	def err_super_in_lambda_unsupported : Error<
	"use of '__super' inside a lambda is unsupported">;

	def err_pragma_expected_file_scope : Error<
	"'#pragma %0' can only appear at file scope">;
	def err_pragma_alloc_text_c_linkage: Error<
	"'#pragma alloc_text' is applicable only to functions with C linkage">;
	def err_pragma_alloc_text_not_function: Error<
	"'#pragma alloc_text' is applicable only to functions">;

	def warn_pragma_unused_undeclared_var : Warning<
	"undeclared variable %0 used as an argument for '#pragma unused'">,
	InGroup<IgnoredPragmas>;
	def warn_atl_uuid_deprecated : Warning<
	"specifying 'uuid' as an ATL attribute is deprecated; use __declspec instead">,
	InGroup<DeprecatedDeclarations>;
	def warn_pragma_unused_expected_var_arg : Warning<
	"only variables can be arguments to '#pragma unused'">,
	InGroup<IgnoredPragmas>;
	def err_pragma_push_visibility_mismatch : Error<
	"#pragma visibility push with no matching #pragma visibility pop">;
	def note_surrounding_namespace_ends_here : Note<
	"surrounding namespace with visibility attribute ends here">;
	def err_pragma_pop_visibility_mismatch : Error<
	"#pragma visibility pop with no matching #pragma visibility push">;
	def note_surrounding_namespace_starts_here : Note<
	"surrounding namespace with visibility attribute starts here">;
	def err_pragma_loop_invalid_argument_type : Error<
	"invalid argument of type %0; expected an integer type">;
	def err_pragma_loop_invalid_argument_value : Error<
	"%select{invalid value '%0'; must be positive\|value '%0' is too large}1">;
	def err_pragma_loop_compatibility : Error<
	"%select{incompatible\|duplicate}0 directives '%1' and '%2'">;
	def err_pragma_loop_precedes_nonloop : Error<
	"expected a for, while, or do-while loop to follow '%0'">;

	def err_pragma_attribute_matcher_subrule_contradicts_rule : Error<
	"redundant attribute subject matcher sub-rule '%0'; '%1' already matches "
	"those declarations">;
	def err_pragma_attribute_matcher_negated_subrule_contradicts_subrule : Error<
	"negated attribute subject matcher sub-rule '%0' contradicts sub-rule '%1'">;
	def err_pragma_attribute_invalid_matchers : Error<
	"attribute %0 can't be applied to %1">;
	def err_pragma_attribute_stack_mismatch : Error<
	"'#pragma clang attribute %select{%1.\|}0pop' with no matching"
	" '#pragma clang attribute %select{%1.\|}0push'">;
	def warn_pragma_attribute_unused : Warning<
	"unused attribute %0 in '#pragma clang attribute push' region">,
	InGroup<PragmaClangAttribute>;
	def note_pragma_attribute_region_ends_here : Note<
	"'#pragma clang attribute push' regions ends here">;
	def err_pragma_attribute_no_pop_eof : Error<"unterminated "
	"'#pragma clang attribute push' at end of file">;
	def note_pragma_attribute_applied_decl_here : Note<
	"when applied to this declaration">;
	def err_pragma_attr_attr_no_push : Error<
	"'#pragma clang attribute' attribute with no matching "
	"'#pragma clang attribute push'">;

	/// Objective-C parser diagnostics
	def err_duplicate_class_def : Error<
	"duplicate interface definition for class %0">;
	def err_undef_superclass : Error<
	"cannot find interface declaration for %0, superclass of %1">;
	def err_forward_superclass : Error<
	"attempting to use the forward class %0 as superclass of %1">;
	def err_no_nsconstant_string_class : Error<
	"cannot find interface declaration for %0">;
	def err_recursive_superclass : Error<
	"trying to recursively use %0 as superclass of %1">;
	def err_conflicting_aliasing_type : Error<"conflicting types for alias %0">;
	def warn_undef_interface : Warning<"cannot find interface declaration for %0">;
	def warn_duplicate_protocol_def : Warning<
	"duplicate protocol definition of %0 is ignored">,
	InGroup<DiagGroup<"duplicate-protocol">>;
	def err_protocol_has_circular_dependency : Error<
	"protocol has circular dependency">;
	def err_undeclared_protocol : Error<"cannot find protocol declaration for %0">;
	def warn_undef_protocolref : Warning<"cannot find protocol definition for %0">;
	def err_atprotocol_protocol : Error<
	"@protocol is using a forward protocol declaration of %0">;
	def warn_readonly_property : Warning<
	"attribute 'readonly' of property %0 restricts attribute "
	"'readwrite' of property inherited from %1">,
	InGroup<PropertyAttr>;

	def warn_property_attribute : Warning<
	"'%1' attribute on property %0 does not match the property inherited from %2">,
	InGroup<PropertyAttr>;
	def warn_property_types_are_incompatible : Warning<
	"property type %0 is incompatible with type %1 inherited from %2">,
	InGroup<DiagGroup<"incompatible-property-type">>;
	def warn_protocol_property_mismatch : Warning<
	"property %select{of type %1\|with attribute '%1'\|without attribute '%1'\|with "
	"getter %1\|with setter %1}0 was selected for synthesis">,
	InGroup<DiagGroup<"protocol-property-synthesis-ambiguity">>;
	def err_protocol_property_mismatch: Error<warn_protocol_property_mismatch.Summary>;
	def err_undef_interface : Error<"cannot find interface declaration for %0">;
	def err_category_forward_interface : Error<
	"cannot define %select{category\|class extension}0 for undefined class %1">;
	def err_class_extension_after_impl : Error<
	"cannot declare class extension for %0 after class implementation">;
	def note_implementation_declared : Note<
	"class implementation is declared here">;
	def note_while_in_implementation : Note<
	"detected while default synthesizing properties in class implementation">;
	def note_class_declared : Note<
	"class is declared here">;
	def note_receiver_class_declared : Note<
	"receiver is instance of class declared here">;
	def note_receiver_expr_here : Note<
	"receiver expression is here">;
	def note_receiver_is_id : Note<
	"receiver is treated with 'id' type for purpose of method lookup">;
	def note_suppressed_class_declare : Note<
	"class with specified objc_requires_property_definitions attribute is declared here">;
	def err_objc_root_class_subclass : Error<
	"objc_root_class attribute may only be specified on a root class declaration">;
	def err_restricted_superclass_mismatch : Error<
	"cannot subclass a class that was declared with the "
	"'objc_subclassing_restricted' attribute">;
	def err_class_stub_subclassing_mismatch : Error<
	"'objc_class_stub' attribute cannot be specified on a class that does not "
	"have the 'objc_subclassing_restricted' attribute">;
	def err_implementation_of_class_stub : Error<
	"cannot declare implementation of a class declared with the "
	"'objc_class_stub' attribute">;
	def warn_objc_root_class_missing : Warning<
	"class %0 defined without specifying a base class">,
	InGroup<ObjCRootClass>;
	def err_objc_runtime_visible_category : Error<
	"cannot implement a category for class %0 that is only visible via the "
	"Objective-C runtime">;
	def err_objc_runtime_visible_subclass : Error<
	"cannot implement subclass %0 of a superclass %1 that is only visible via the "
	"Objective-C runtime">;
	def note_objc_needs_superclass : Note<
	"add a super class to fix this problem">;
	def err_conflicting_super_class : Error<"conflicting super class name %0">;
	def err_dup_implementation_class : Error<"reimplementation of class %0">;
	def err_dup_implementation_category : Error<
	"reimplementation of category %1 for class %0">;
	def err_conflicting_ivar_type : Error<
	"instance variable %0 has conflicting type%diff{: $ vs $\|}1,2">;
	def err_duplicate_ivar_declaration : Error<
	"instance variable is already declared">;
	def warn_on_superclass_use : Warning<
	"class implementation may not have super class">;
	def err_conflicting_ivar_bitwidth : Error<
	"instance variable %0 has conflicting bit-field width">;
	def err_conflicting_ivar_name : Error<
	"conflicting instance variable names: %0 vs %1">;
	def err_inconsistent_ivar_count : Error<
	"inconsistent number of instance variables specified">;
	def warn_undef_method_impl : Warning<"method definition for %0 not found">,
	InGroup<DiagGroup<"incomplete-implementation">>;
	def warn_objc_boxing_invalid_utf8_string : Warning<
	"string is ill-formed as UTF-8 and will become a null %0 when boxed">,
	InGroup<ObjCBoxing>;

	def err_objc_non_runtime_protocol_in_protocol_expr : Error<
	"cannot use a protocol declared 'objc_non_runtime_protocol' in a @protocol expression">;
	def err_objc_direct_on_protocol : Error<
	"'objc_direct' attribute cannot be applied to %select{methods\|properties}0 "
	"declared in an Objective-C protocol">;
	def err_objc_direct_duplicate_decl : Error<
	"%select{\|direct }0%select{method\|property}1 declaration conflicts "
	"with previous %select{\|direct }2declaration of %select{method\|property}1 %3">;
	def err_objc_direct_impl_decl_mismatch : Error<
	"direct method was declared in %select{the primary interface\|an extension\|a category}0 "
	"but is implemented in %select{the primary interface\|a category\|a different category}1">;
	def err_objc_direct_missing_on_decl : Error<
	"direct method implementation was previously declared not direct">;
	def err_objc_direct_on_override : Error<
	"methods that %select{override superclass methods\|implement protocol requirements}0 cannot be direct">;
	def err_objc_override_direct_method : Error<
	"cannot override a method that is declared direct by a superclass">;
	def warn_objc_direct_ignored : Warning<
	"%0 attribute isn't implemented by this Objective-C runtime">,
	InGroup<IgnoredAttributes>;
	def warn_objc_direct_property_ignored : Warning<
	"direct attribute on property %0 ignored (not implemented by this Objective-C runtime)">,
	InGroup<IgnoredAttributes>;
	def err_objc_direct_dynamic_property : Error<
	"direct property cannot be @dynamic">;
	def err_objc_direct_protocol_conformance : Error<
	"%select{category %1\|class extension}0 cannot conform to protocol %2 because "
	"of direct members declared in interface %3">;
	def note_direct_member_here : Note<"direct member declared here">;

	def warn_conflicting_overriding_ret_types : Warning<
	"conflicting return type in "
	"declaration of %0%diff{: $ vs $\|}1,2">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_conflicting_ret_types : Warning<
	"conflicting return type in "
	"implementation of %0%diff{: $ vs $\|}1,2">,
	InGroup<MismatchedReturnTypes>;

	def warn_conflicting_overriding_ret_type_modifiers : Warning<
	"conflicting distributed object modifiers on return type "
	"in declaration of %0">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_conflicting_ret_type_modifiers : Warning<
	"conflicting distributed object modifiers on return type "
	"in implementation of %0">,
	InGroup<DistributedObjectModifiers>;

	def warn_non_covariant_overriding_ret_types : Warning<
	"conflicting return type in "
	"declaration of %0: %1 vs %2">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_non_covariant_ret_types : Warning<
	"conflicting return type in "
	"implementation of %0: %1 vs %2">,
	InGroup<MethodSignatures>, DefaultIgnore;

	def warn_conflicting_overriding_param_types : Warning<
	"conflicting parameter types in "
	"declaration of %0%diff{: $ vs $\|}1,2">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_conflicting_param_types : Warning<
	"conflicting parameter types in "
	"implementation of %0%diff{: $ vs $\|}1,2">,
	InGroup<MismatchedParameterTypes>;

	def warn_conflicting_param_modifiers : Warning<
	"conflicting distributed object modifiers on parameter type "
	"in implementation of %0">,
	InGroup<DistributedObjectModifiers>;

	def warn_conflicting_overriding_param_modifiers : Warning<
	"conflicting distributed object modifiers on parameter type "
	"in declaration of %0">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_non_contravariant_overriding_param_types : Warning<
	"conflicting parameter types in "
	"declaration of %0: %1 vs %2">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_non_contravariant_param_types : Warning<
	"conflicting parameter types in "
	"implementation of %0: %1 vs %2">,
	InGroup<MethodSignatures>, DefaultIgnore;

	def warn_conflicting_overriding_variadic :Warning<
	"conflicting variadic declaration of method and its "
	"implementation">,
	InGroup<OverridingMethodMismatch>, DefaultIgnore;

	def warn_conflicting_variadic :Warning<
	"conflicting variadic declaration of method and its "
	"implementation">;

	def warn_category_method_impl_match:Warning<
	"category is implementing a method which will also be implemented"
	" by its primary class">, InGroup<ObjCProtocolMethodImpl>;

	def warn_implements_nscopying : Warning<
	"default assign attribute on property %0 which implements "
	"NSCopying protocol is not appropriate with -fobjc-gc[-only]">;

	def warn_multiple_method_decl : Warning<"multiple methods named %0 found">,
	InGroup<ObjCMultipleMethodNames>;
	def warn_strict_multiple_method_decl : Warning<
	"multiple methods named %0 found">, InGroup<StrictSelector>, DefaultIgnore;
	def warn_accessor_property_type_mismatch : Warning<
	"type of property %0 does not match type of accessor %1">;
	def note_conv_function_declared_at : Note<"type conversion function declared here">;
	def note_method_declared_at : Note<"method %0 declared here">;
	def note_direct_method_declared_at : Note<"direct method %0 declared here">;
	def note_property_attribute : Note<"property %0 is declared "
	"%select{deprecated\|unavailable\|partial}1 here">;
	def err_setter_type_void : Error<"type of setter must be void">;
	def err_duplicate_method_decl : Error<"duplicate declaration of method %0">;
	def warn_duplicate_method_decl :
	Warning<"multiple declarations of method %0 found and ignored">,
	InGroup<MethodDuplicate>, DefaultIgnore;
	def warn_objc_cdirective_format_string :
	Warning<"using %0 directive in %select{NSString\|CFString}1 "
	"which is being passed as a formatting argument to the formatting "
	"%select{method\|CFfunction}2">,
	InGroup<ObjCCStringFormat>, DefaultIgnore;
	def err_objc_var_decl_inclass :
	Error<"cannot declare variable inside @interface or @protocol">;
	def err_missing_method_context : Error<
	"missing context for method declaration">;
	def err_objc_property_attr_mutually_exclusive : Error<
	"property attributes '%0' and '%1' are mutually exclusive">;
	def err_objc_property_requires_object : Error<
	"property with '%0' attribute must be of object type">;
	def warn_objc_property_assign_on_object : Warning<
	"'assign' property of object type may become a dangling reference; consider using 'unsafe_unretained'">,
	InGroup<ObjCPropertyAssignOnObjectType>, DefaultIgnore;
	def warn_objc_property_no_assignment_attribute : Warning<
	"no 'assign', 'retain', or 'copy' attribute is specified - "
	"'assign' is assumed">,
	InGroup<ObjCPropertyNoAttribute>;
	def warn_objc_isa_use : Warning<
	"direct access to Objective-C's isa is deprecated in favor of "
	"object_getClass()">, InGroup<DeprecatedObjCIsaUsage>;
	def warn_objc_isa_assign : Warning<
	"assignment to Objective-C's isa is deprecated in favor of "
	"object_setClass()">, InGroup<DeprecatedObjCIsaUsage>;
	def warn_objc_pointer_masking : Warning<
	"bitmasking for introspection of Objective-C object pointers is strongly "
	"discouraged">,
	InGroup<ObjCPointerIntrospect>;
	def warn_objc_pointer_masking_performSelector : Warning<warn_objc_pointer_masking.Summary>,
	InGroup<ObjCPointerIntrospectPerformSelector>;
	def warn_objc_property_default_assign_on_object : Warning<
	"default property attribute 'assign' not appropriate for object">,
	InGroup<ObjCPropertyNoAttribute>;
	def warn_property_attr_mismatch : Warning<
	"property attribute in class extension does not match the primary class">,
	InGroup<PropertyAttr>;
	def warn_property_implicitly_mismatched : Warning <
	"primary property declaration is implicitly strong while redeclaration "
	"in class extension is weak">,
	InGroup<DiagGroup<"objc-property-implicit-mismatch">>;
	def warn_objc_property_copy_missing_on_block : Warning<
	"'copy' attribute must be specified for the block property "
	"when -fobjc-gc-only is specified">;
	def warn_objc_property_retain_of_block : Warning<
	"retain'ed block property does not copy the block "
	"- use copy attribute instead">, InGroup<ObjCRetainBlockProperty>;
	def warn_objc_readonly_property_has_setter : Warning<
	"setter cannot be specified for a readonly property">,
	InGroup<ObjCReadonlyPropertyHasSetter>;
	def warn_atomic_property_rule : Warning<
	"writable atomic property %0 cannot pair a synthesized %select{getter\|setter}1 "
	"with a user defined %select{getter\|setter}2">,
	InGroup<DiagGroup<"atomic-property-with-user-defined-accessor">>;
	def note_atomic_property_fixup_suggest : Note<"setter and getter must both be "
	"synthesized, or both be user defined, or the property must be nonatomic">;
	def err_atomic_property_nontrivial_assign_op : Error<
	"atomic property of reference type %0 cannot have non-trivial assignment"
	" operator">;
	def warn_cocoa_naming_owned_rule : Warning<
	"property follows Cocoa naming"
	" convention for returning 'owned' objects">,
	InGroup<DiagGroup<"objc-property-matches-cocoa-ownership-rule">>;
	def err_cocoa_naming_owned_rule : Error<
	"property follows Cocoa naming"
	" convention for returning 'owned' objects">;
	def note_cocoa_naming_declare_family : Note<
	"explicitly declare getter %objcinstance0 with '%1' to return an 'unowned' "
	"object">;
	def warn_auto_synthesizing_protocol_property :Warning<
	"auto property synthesis will not synthesize property %0"
	" declared in protocol %1">,
	InGroup<DiagGroup<"objc-protocol-property-synthesis">>;
	def note_add_synthesize_directive : Note<
	"add a '@synthesize' directive">;
	def warn_no_autosynthesis_shared_ivar_property : Warning <
	"auto property synthesis will not synthesize property "
	"%0 because it cannot share an ivar with another synthesized property">,
	InGroup<ObjCNoPropertyAutoSynthesis>;
	def warn_no_autosynthesis_property : Warning<
	"auto property synthesis will not synthesize property "
	"%0 because it is 'readwrite' but it will be synthesized 'readonly' "
	"via another property">,
	InGroup<ObjCNoPropertyAutoSynthesis>;
	def warn_autosynthesis_property_in_superclass : Warning<
	"auto property synthesis will not synthesize property "
	"%0; it will be implemented by its superclass, use @dynamic to "
	"acknowledge intention">,
	InGroup<ObjCNoPropertyAutoSynthesis>;
	def warn_autosynthesis_property_ivar_match :Warning<
	"autosynthesized property %0 will use %select{\|synthesized}1 instance variable "
	"%2, not existing instance variable %3">,
	InGroup<DiagGroup<"objc-autosynthesis-property-ivar-name-match">>;
	def warn_missing_explicit_synthesis : Warning <
	"auto property synthesis is synthesizing property not explicitly synthesized">,
	InGroup<DiagGroup<"objc-missing-property-synthesis">>, DefaultIgnore;
	def warn_property_getter_owning_mismatch : Warning<
	"property declared as returning non-retained objects"
	"; getter returning retained objects">;
	def warn_property_redecl_getter_mismatch : Warning<
	"getter name mismatch between property redeclaration (%1) and its original "
	"declaration (%0)">, InGroup<PropertyAttr>;
	def err_property_setter_ambiguous_use : Error<
	"synthesized properties %0 and %1 both claim setter %2 -"
	" use of this setter will cause unexpected behavior">;
	def warn_default_atomic_custom_getter_setter : Warning<
	"atomic by default property %0 has a user defined %select{getter\|setter}1 "
	"(property should be marked 'atomic' if this is intended)">,
	InGroup<CustomAtomic>, DefaultIgnore;
	def err_use_continuation_class : Error<
	"illegal redeclaration of property in class extension %0"
	" (attribute must be 'readwrite', while its primary must be 'readonly')">;
	def err_type_mismatch_continuation_class : Error<
	"type of property %0 in class extension does not match "
	"property type in primary class">;
	def err_use_continuation_class_redeclaration_readwrite : Error<
	"illegal redeclaration of 'readwrite' property in class extension %0"
	" (perhaps you intended this to be a 'readwrite' redeclaration of a "
	"'readonly' public property?)">;
	def err_continuation_class : Error<"class extension has no primary class">;
	def err_property_type : Error<"property cannot have array or function type %0">;
	def err_missing_property_context : Error<
	"missing context for property implementation declaration">;
	def err_bad_property_decl : Error<
	"property implementation must have its declaration in interface %0 or one of "
	"its extensions">;
	def err_category_property : Error<
	"property declared in category %0 cannot be implemented in "
	"class implementation">;
	def note_property_declare : Note<
	"property declared here">;
	def note_protocol_property_declare : Note<
	"it could also be property "
	"%select{of type %1\|without attribute '%1'\|with attribute '%1'\|with getter "
	"%1\|with setter %1}0 declared here">;
	def note_property_synthesize : Note<
	"property synthesized here">;
	def err_synthesize_category_decl : Error<
	"@synthesize not allowed in a category's implementation">;
	def err_synthesize_on_class_property : Error<
	"@synthesize not allowed on a class property %0">;
	def err_missing_property_interface : Error<
	"property implementation in a category with no category declaration">;
	def err_bad_category_property_decl : Error<
	"property implementation must have its declaration in the category %0">;
	def err_bad_property_context : Error<
	"property implementation must be in a class or category implementation">;
	def err_missing_property_ivar_decl : Error<
	"synthesized property %0 must either be named the same as a compatible"
	" instance variable or must explicitly name an instance variable">;
	def err_arc_perform_selector_retains : Error<
	"performSelector names a selector which retains the object">;
	def warn_arc_perform_selector_leaks : Warning<
	"performSelector may cause a leak because its selector is unknown">,
	InGroup<DiagGroup<"arc-performSelector-leaks">>;
	def warn_dealloc_in_category : Warning<
	"-dealloc is being overridden in a category">,
	InGroup<DeallocInCategory>;
	def err_gc_weak_property_strong_type : Error<
	"weak attribute declared on a __strong type property in GC mode">;
	def warn_arc_repeated_use_of_weak : Warning <
	"weak %select{variable\|property\|implicit property\|instance variable}0 %1 is "
	"accessed multiple times in this %select{function\|method\|block\|lambda}2 "
	"but may be unpredictably set to nil; assign to a strong variable to keep "
	"the object alive">,
	InGroup<ARCRepeatedUseOfWeak>, DefaultIgnore;
	def warn_implicitly_retains_self : Warning <
	"block implicitly retains 'self'; explicitly mention 'self' to indicate "
	"this is intended behavior">,
	InGroup<DiagGroup<"implicit-retain-self">>, DefaultIgnore;
	def warn_arc_possible_repeated_use_of_weak : Warning <
	"weak %select{variable\|property\|implicit property\|instance variable}0 %1 may "
	"be accessed multiple times in this %select{function\|method\|block\|lambda}2 "
	"and may be unpredictably set to nil; assign to a strong variable to keep "
	"the object alive">,
	InGroup<ARCRepeatedUseOfWeakMaybe>, DefaultIgnore;
	def note_arc_weak_also_accessed_here : Note<
	"also accessed here">;
	def err_incomplete_synthesized_property : Error<
	"cannot synthesize property %0 with incomplete type %1">;

	def err_property_ivar_type : Error<
	"type of property %0 (%1) does not match type of instance variable %2 (%3)">;
	def err_property_accessor_type : Error<
	"type of property %0 (%1) does not match type of accessor %2 (%3)">;
	def err_ivar_in_superclass_use : Error<
	"property %0 attempting to use instance variable %1 declared in super class %2">;
	def err_weak_property : Error<
	"existing instance variable %1 for __weak property %0 must be __weak">;
	def err_strong_property : Error<
	"existing instance variable %1 for strong property %0 may not be __weak">;
	def err_dynamic_property_ivar_decl : Error<
	"dynamic property cannot have instance variable specification">;
	def err_duplicate_ivar_use : Error<
	"synthesized properties %0 and %1 both claim instance variable %2">;
	def err_property_implemented : Error<"property %0 is already implemented">;
	def warn_objc_missing_super_call : Warning<
	"method possibly missing a [super %0] call">,
	InGroup<ObjCMissingSuperCalls>;
	def err_dealloc_bad_result_type : Error<
	"dealloc return type must be correctly specified as 'void' under ARC, "
	"instead of %0">;
	def warn_undeclared_selector : Warning<
	"undeclared selector %0">, InGroup<UndeclaredSelector>, DefaultIgnore;
	def warn_undeclared_selector_with_typo : Warning<
	"undeclared selector %0; did you mean %1?">,
	InGroup<UndeclaredSelector>, DefaultIgnore;
	def warn_implicit_atomic_property : Warning<
	"property is assumed atomic by default">, InGroup<ImplicitAtomic>, DefaultIgnore;
	def note_auto_readonly_iboutlet_fixup_suggest : Note<
	"property should be changed to be readwrite">;
	def warn_auto_readonly_iboutlet_property : Warning<
	"readonly IBOutlet property %0 when auto-synthesized may "
	"not work correctly with 'nib' loader">,
	InGroup<DiagGroup<"readonly-iboutlet-property">>;
	def warn_auto_implicit_atomic_property : Warning<
	"property is assumed atomic when auto-synthesizing the property">,
	InGroup<ImplicitAtomic>, DefaultIgnore;
	def warn_unimplemented_selector: Warning<
	"no method with selector %0 is implemented in this translation unit">,
	InGroup<Selector>, DefaultIgnore;
	def warn_unimplemented_protocol_method : Warning<
	"method %0 in protocol %1 not implemented">, InGroup<Protocol>;
	def warn_multiple_selectors: Warning<
	"several methods with selector %0 of mismatched types are found "
	"for the @selector expression">,
	InGroup<SelectorTypeMismatch>, DefaultIgnore;
	def err_direct_selector_expression : Error<
	"@selector expression formed with direct selector %0">;
	def warn_potentially_direct_selector_expression : Warning<
	"@selector expression formed with potentially direct selector %0">,
	InGroup<ObjCPotentiallyDirectSelector>;
	def warn_strict_potentially_direct_selector_expression : Warning<
	warn_potentially_direct_selector_expression.Summary>,
	InGroup<ObjCStrictPotentiallyDirectSelector>, DefaultIgnore;

	def err_objc_kindof_nonobject : Error<
	"'__kindof' specifier cannot be applied to non-object type %0">;
	def err_objc_kindof_wrong_position : Error<
	"'__kindof' type specifier must precede the declarator">;

	def err_objc_method_unsupported_param_ret_type : Error<
	"%0 %select{parameter\|return}1 type is unsupported; "
	"support for vector types for this target is introduced in %2">;

	def warn_messaging_unqualified_id : Warning<
	"messaging unqualified id">, DefaultIgnore,
	InGroup<DiagGroup<"objc-messaging-id">>;
	def err_messaging_unqualified_id_with_direct_method : Error<
	"messaging unqualified id with a method that is possibly direct">;
	def err_messaging_super_with_direct_method : Error<
	"messaging super with a direct method">;
	def err_messaging_class_with_direct_method : Error<
	"messaging a Class with a method that is possibly direct">;

	// C++ declarations
	def err_static_assert_expression_is_not_constant : Error<
	"static assertion expression is not an integral constant expression">;
	def err_constexpr_if_condition_expression_is_not_constant : Error<
	"constexpr if condition is not a constant expression">;
	def err_static_assert_failed : Error<"static assertion failed%select{: %1\|}0">;
	def err_static_assert_requirement_failed : Error<
	"static assertion failed due to requirement '%0'%select{: %2\|}1">;
	def note_expr_evaluates_to : Note<
	"expression evaluates to '%0 %1 %2'">;

	def warn_consteval_if_always_true : Warning<
	"consteval if is always true in an %select{unevaluated\|immediate}0 context">,
	InGroup<DiagGroup<"redundant-consteval-if">>;

	def ext_inline_variable : ExtWarn<
	"inline variables are a C++17 extension">, InGroup<CXX17>;
	def warn_cxx14_compat_inline_variable : Warning<
	"inline variables are incompatible with C++ standards before C++17">,
	DefaultIgnore, InGroup<CXXPre17Compat>;

	def warn_inline_namespace_reopened_noninline : Warning<
	"inline namespace reopened as a non-inline namespace">,
	InGroup<InlineNamespaceReopenedNoninline>;
	def err_inline_namespace_mismatch : Error<
	"non-inline namespace cannot be reopened as inline">;

	def err_unexpected_friend : Error<
	"friends can only be classes or functions">;
	def ext_enum_friend : ExtWarn<
	"befriending enumeration type %0 is a C++11 extension">, InGroup<CXX11>;
	def warn_cxx98_compat_enum_friend : Warning<
	"befriending enumeration type %0 is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def ext_nonclass_type_friend : ExtWarn<
	"non-class friend type %0 is a C++11 extension">, InGroup<CXX11>;
	def warn_cxx98_compat_nonclass_type_friend : Warning<
	"non-class friend type %0 is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_friend_is_member : Error<
	"friends cannot be members of the declaring class">;
	def warn_cxx98_compat_friend_is_member : Warning<
	"friend declaration naming a member of the declaring class is incompatible "
	"with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
	def ext_unelaborated_friend_type : ExtWarn<
	"unelaborated friend declaration is a C++11 extension; specify "
	"'%select{struct\|interface\|union\|class\|enum}0' to befriend %1">,
	InGroup<CXX11>;
	def warn_cxx98_compat_unelaborated_friend_type : Warning<
	"befriending %1 without '%select{struct\|interface\|union\|class\|enum}0' "
	"keyword is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
	def err_qualified_friend_no_match : Error<
	"friend declaration of %0 does not match any declaration in %1">;
	def err_introducing_special_friend : Error<
	"%plural{[0,2]:must use a qualified name when declaring\|3:cannot declare}0"
	" a %select{constructor\|destructor\|conversion operator\|deduction guide}0 "
	"as a friend">;
	def err_tagless_friend_type_template : Error<
	"friend type templates must use an elaborated type">;
	def err_no_matching_local_friend : Error<
	"no matching function found in local scope">;
	def err_no_matching_local_friend_suggest : Error<
	"no matching function %0 found in local scope; did you mean %3?">;
	def err_partial_specialization_friend : Error<
	"partial specialization cannot be declared as a friend">;
	def err_qualified_friend_def : Error<
	"friend function definition cannot be qualified with '%0'">;
	def err_friend_def_in_local_class : Error<
	"friend function cannot be defined in a local class">;
	def err_friend_not_first_in_declaration : Error<
	"'friend' must appear first in a non-function declaration">;
	def err_using_decl_friend : Error<
	"cannot befriend target of using declaration">;
	def warn_template_qualified_friend_unsupported : Warning<
	"dependent nested name specifier '%0' for friend class declaration is "
	"not supported; turning off access control for %1">,
	InGroup<UnsupportedFriend>;
	def warn_template_qualified_friend_ignored : Warning<
	"dependent nested name specifier '%0' for friend template declaration is "
	"not supported; ignoring this friend declaration">,
	InGroup<UnsupportedFriend>;
	def ext_friend_tag_redecl_outside_namespace : ExtWarn<
	"unqualified friend declaration referring to type outside of the nearest "
	"enclosing namespace is a Microsoft extension; add a nested name specifier">,
	InGroup<MicrosoftUnqualifiedFriend>;
	def err_pure_friend : Error<"friend declaration cannot have a pure-specifier">;

	def err_invalid_base_in_interface : Error<
	"interface type cannot inherit from "
	"%select{struct\|non-public interface\|class}0 %1">;

	def err_abstract_type_in_decl : Error<
	"%select{return\|parameter\|variable\|field\|instance variable\|"
	"synthesized instance variable}0 type %1 is an abstract class">;
	def err_allocation_of_abstract_type : Error<
	"allocating an object of abstract class type %0">;
	def err_throw_abstract_type : Error<
	"cannot throw an object of abstract type %0">;
	def err_array_of_abstract_type : Error<"array of abstract class type %0">;
	def err_capture_of_abstract_type : Error<
	"by-copy capture of value of abstract type %0">;
	def err_capture_of_incomplete_or_sizeless_type : Error<
	"by-copy capture of variable %0 with %select{incomplete\|sizeless}1 type %2">;
	def err_capture_default_non_local : Error<
	"non-local lambda expression cannot have a capture-default">;

	def err_multiple_final_overriders : Error<
	"virtual function %q0 has more than one final overrider in %1">;
	def note_final_overrider : Note<"final overrider of %q0 in %1">;

	def err_type_defined_in_type_specifier : Error<
	"%0 cannot be defined in a type specifier">;
	def err_type_defined_in_result_type : Error<
	"%0 cannot be defined in the result type of a function">;
	def err_type_defined_in_param_type : Error<
	"%0 cannot be defined in a parameter type">;
	def err_type_defined_in_alias_template : Error<
	"%0 cannot be defined in a type alias template">;
	def err_type_defined_in_condition : Error<
	"%0 cannot be defined in a condition">;
	def err_type_defined_in_enum : Error<
	"%0 cannot be defined in an enumeration">;
	def ext_type_defined_in_offsetof : Extension<
	"defining a type within '%select{__builtin_offsetof\|offsetof}0' is a Clang "
	"extension">, InGroup<GNUOffsetofExtensions>;

	def note_pure_virtual_function : Note<
	"unimplemented pure virtual method %0 in %1">;

	def note_pure_qualified_call_kext : Note<
	"qualified call to %0::%1 is treated as a virtual call to %1 due to -fapple-kext">;

	def err_deleted_decl_not_first : Error<
	"deleted definition must be first declaration">;

	def err_deleted_override : Error<
	"deleted function %0 cannot override a non-deleted function">;
	def err_non_deleted_override : Error<
	"non-deleted function %0 cannot override a deleted function">;
	def err_consteval_override : Error<
	"consteval function %0 cannot override a non-consteval function">;
	def err_non_consteval_override : Error<
	"non-consteval function %0 cannot override a consteval function">;

	def warn_weak_vtable : Warning<
	"%0 has no out-of-line virtual method definitions; its vtable will be "
	"emitted in every translation unit">,
	InGroup<DiagGroup<"weak-vtables">>, DefaultIgnore;
	def warn_weak_template_vtable : Warning<
	"this warning is no longer in use and will be removed in the next release">,
	InGroup<DiagGroup<"weak-template-vtables">>, DefaultIgnore;

	def ext_using_undefined_std : ExtWarn<
	"using directive refers to implicitly-defined namespace 'std'">;

	// C++ exception specifications
	def err_exception_spec_in_typedef : Error<
	"exception specifications are not allowed in %select{typedefs\|type aliases}0">;
	def err_distant_exception_spec : Error<
	"exception specifications are not allowed beyond a single level "
	"of indirection">;
	def err_incomplete_in_exception_spec : Error<
	"%select{\|pointer to \|reference to }0incomplete type %1 is not allowed "
	"in exception specification">;
	def err_sizeless_in_exception_spec : Error<
	"%select{\|reference to }0sizeless type %1 is not allowed "
	"in exception specification">;
	def ext_incomplete_in_exception_spec : ExtWarn<err_incomplete_in_exception_spec.Summary>,
	InGroup<MicrosoftExceptionSpec>;
	def err_rref_in_exception_spec : Error<
	"rvalue reference type %0 is not allowed in exception specification">;
	def err_mismatched_exception_spec : Error<
	"exception specification in declaration does not match previous declaration">;
	def ext_mismatched_exception_spec : ExtWarn<err_mismatched_exception_spec.Summary>,
	InGroup<MicrosoftExceptionSpec>;
	def err_override_exception_spec : Error<
	"exception specification of overriding function is more lax than "
	"base version">;
	def ext_override_exception_spec : ExtWarn<err_override_exception_spec.Summary>,
	InGroup<MicrosoftExceptionSpec>;
	def err_incompatible_exception_specs : Error<
	"target exception specification is not superset of source">;
	def warn_incompatible_exception_specs : Warning<
	err_incompatible_exception_specs.Summary>, InGroup<IncompatibleExceptionSpec>;
	def err_deep_exception_specs_differ : Error<
	"exception specifications of %select{return\|argument}0 types differ">;
	def warn_deep_exception_specs_differ : Warning<
	err_deep_exception_specs_differ.Summary>, InGroup<IncompatibleExceptionSpec>;
	def err_missing_exception_specification : Error<
	"%0 is missing exception specification '%1'">;
	def ext_missing_exception_specification : ExtWarn<
	err_missing_exception_specification.Summary>,
	InGroup<DiagGroup<"missing-exception-spec">>;
	def err_exception_spec_not_parsed : Error<
	"exception specification is not available until end of class definition">;
	def err_exception_spec_cycle : Error<
	"exception specification of %0 uses itself">;
	def err_exception_spec_incomplete_type : Error<
	"exception specification needed for member of incomplete class %0">;
	def warn_wasm_dynamic_exception_spec_ignored : ExtWarn<
	"dynamic exception specifications with types are currently ignored in wasm">,
	InGroup<WebAssemblyExceptionSpec>;

	// C++ access checking
	def err_class_redeclared_with_different_access : Error<
	"%0 redeclared with '%1' access">;
	def err_access : Error<
	"%1 is a %select{private\|protected}0 member of %3">, AccessControl;
	def ext_ms_using_declaration_inaccessible : ExtWarn<
	"using declaration referring to inaccessible member '%0' (which refers "
	"to accessible member '%1') is a Microsoft compatibility extension">,
	AccessControl, InGroup<MicrosoftUsingDecl>;
	def err_access_ctor : Error<
	"calling a %select{private\|protected}0 constructor of class %2">,
	AccessControl;
	def ext_rvalue_to_reference_access_ctor : Extension<
	"C++98 requires an accessible copy constructor for class %2 when binding "
	"a reference to a temporary; was %select{private\|protected}0">,
	AccessControl, InGroup<BindToTemporaryCopy>;
	def err_access_base_ctor : Error<
	// The ERRORs represent other special members that aren't constructors, in
	// hopes that someone will bother noticing and reporting if they appear
	"%select{base class\|inherited virtual base class}0 %1 has %select{private\|"
	"protected}3 %select{default \|copy \|move \|ERROR \|ERROR "
	"\|ERROR\|}2constructor">, AccessControl;
	def err_access_field_ctor : Error<
	// The ERRORs represent other special members that aren't constructors, in
	// hopes that someone will bother noticing and reporting if they appear
	"field of type %0 has %select{private\|protected}2 "
	"%select{default \|copy \|move \|ERROR \|ERROR \|ERROR \|}1constructor">,
	AccessControl;
	def err_access_friend_function : Error<
	"friend function %1 is a %select{private\|protected}0 member of %3">,
	AccessControl;

	def err_access_dtor : Error<
	"calling a %select{private\|protected}1 destructor of class %0">,
	AccessControl;
	def err_access_dtor_base :
	Error<"base class %0 has %select{private\|protected}1 destructor">,
	AccessControl;
	def err_access_dtor_vbase :
	Error<"inherited virtual base class %1 has "
	"%select{private\|protected}2 destructor">,
	AccessControl;
	def err_access_dtor_temp :
	Error<"temporary of type %0 has %select{private\|protected}1 destructor">,
	AccessControl;
	def err_access_dtor_exception :
	Error<"exception object of type %0 has %select{private\|protected}1 "
	"destructor">, AccessControl;
	def err_access_dtor_field :
	Error<"field of type %1 has %select{private\|protected}2 destructor">,
	AccessControl;
	def err_access_dtor_var :
	Error<"variable of type %1 has %select{private\|protected}2 destructor">,
	AccessControl;
	def err_access_dtor_ivar :
	Error<"instance variable of type %0 has %select{private\|protected}1 "
	"destructor">,
	AccessControl;
	def note_previous_access_declaration : Note<
	"previously declared '%1' here">;
	def note_access_natural : Note<
	"%select{\|implicitly }1declared %select{private\|protected}0 here">;
	def note_access_constrained_by_path : Note<
	"constrained by %select{\|implicitly }1%select{private\|protected}0"
	" inheritance here">;
	def note_access_protected_restricted_noobject : Note<
	"must name member using the type of the current context %0">;
	def note_access_protected_restricted_ctordtor : Note<
	"protected %select{constructor\|destructor}0 can only be used to "
	"%select{construct\|destroy}0 a base class subobject">;
	def note_access_protected_restricted_object : Note<
	"can only access this member on an object of type %0">;
	def warn_cxx98_compat_sfinae_access_control : Warning<
	"substitution failure due to access control is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore, NoSFINAE;

	// C++ name lookup
	def err_incomplete_nested_name_spec : Error<
	"incomplete type %0 named in nested name specifier">;
	def err_incomplete_enum : Error<
	"enumeration %0 is incomplete">;
	def err_dependent_nested_name_spec : Error<
	"nested name specifier for a declaration cannot depend on a template "
	"parameter">;
	def err_nested_name_member_ref_lookup_ambiguous : Error<
	"lookup of %0 in member access expression is ambiguous">;
	def ext_nested_name_member_ref_lookup_ambiguous : ExtWarn<
	"lookup of %0 in member access expression is ambiguous; using member of %1">,
	InGroup<AmbigMemberTemplate>;
	def note_ambig_member_ref_object_type : Note<
	"lookup in the object type %0 refers here">;
	def note_ambig_member_ref_scope : Note<
	"lookup from the current scope refers here">;
	def err_qualified_member_nonclass : Error<
	"qualified member access refers to a member in %0">;
	def err_incomplete_member_access : Error<
	"member access into incomplete type %0">;
	def err_incomplete_type : Error<
	"incomplete type %0 where a complete type is required">;
	def warn_cxx98_compat_enum_nested_name_spec : Warning<
	"enumeration type in nested name specifier is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_nested_name_spec_is_not_class : Error<
	"%0 cannot appear before '::' because it is not a class"
	"%select{ or namespace\|, namespace, or enumeration}1; did you mean ':'?">;
	def ext_nested_name_spec_is_enum : ExtWarn<
	"use of enumeration in a nested name specifier is a C++11 extension">,
	InGroup<CXX11>;
	def err_out_of_line_qualified_id_type_names_constructor : Error<
	"qualified reference to %0 is a constructor name rather than a "
	"%select{template name\|type}1 in this context">;
	def ext_out_of_line_qualified_id_type_names_constructor : ExtWarn<
	"ISO C++ specifies that "
	"qualified reference to %0 is a constructor name rather than a "
	"%select{template name\|type}1 in this context, despite preceding "
	"%select{'typename'\|'template'}2 keyword">, SFINAEFailure,
	InGroup<DiagGroup<"injected-class-name">>;

	// C++ class members
	def err_storageclass_invalid_for_member : Error<
	"storage class specified for a member declaration">;
	def err_mutable_function : Error<"'mutable' cannot be applied to functions">;
	def err_mutable_reference : Error<"'mutable' cannot be applied to references">;
	def ext_mutable_reference : ExtWarn<
	"'mutable' on a reference type is a Microsoft extension">,
	InGroup<MicrosoftMutableReference>;
	def err_mutable_const : Error<"'mutable' and 'const' cannot be mixed">;
	def err_mutable_nonmember : Error<
	"'mutable' can only be applied to member variables">;
	def err_virtual_in_union : Error<
	"unions cannot have virtual functions">;
	def err_virtual_non_function : Error<
	"'virtual' can only appear on non-static member functions">;
	def err_virtual_out_of_class : Error<
	"'virtual' can only be specified inside the class definition">;
	def err_virtual_member_function_template : Error<
	"'virtual' cannot be specified on member function templates">;
	def err_static_overrides_virtual : Error<
	"'static' member function %0 overrides a virtual function in a base class">;
	def err_explicit_non_function : Error<
	"'explicit' can only appear on non-static member functions">;
	def err_explicit_out_of_class : Error<
	"'explicit' can only be specified inside the class definition">;
	def err_explicit_non_ctor_or_conv_function : Error<
	"'explicit' can only be applied to a constructor or conversion function">;
	def err_static_not_bitfield : Error<"static member %0 cannot be a bit-field">;
	def err_static_out_of_line : Error<
	"'static' can only be specified inside the class definition">;
	def ext_static_out_of_line : ExtWarn<
	err_static_out_of_line.Summary>,
	InGroup<MicrosoftTemplate>;
	def err_storage_class_for_static_member : Error<
	"static data member definition cannot specify a storage class">;
	def err_typedef_not_bitfield : Error<"typedef member %0 cannot be a bit-field">;
	def err_not_integral_type_bitfield : Error<
	"bit-field %0 has non-integral type %1">;
	def err_not_integral_type_anon_bitfield : Error<
	"anonymous bit-field has non-integral type %0">;
	def err_anon_bitfield_qualifiers : Error<
	"anonymous bit-field cannot have qualifiers">;
	def err_member_function_initialization : Error<
	"initializer on function does not look like a pure-specifier">;
	def err_non_virtual_pure : Error<
	"%0 is not virtual and cannot be declared pure">;
	def ext_pure_function_definition : ExtWarn<
	"function definition with pure-specifier is a Microsoft extension">,
	InGroup<MicrosoftPureDefinition>;
	def err_qualified_member_of_unrelated : Error<
	"%q0 is not a member of class %1">;

	def err_member_function_call_bad_cvr : Error<
	"'this' argument to member function %0 has type %1, but function is not marked "
	"%select{const\|restrict\|const or restrict\|volatile\|const or volatile\|"
	"volatile or restrict\|const, volatile, or restrict}2">;
	def err_member_function_call_bad_ref : Error<
	"'this' argument to member function %0 is an %select{lvalue\|rvalue}1, "
	"but function has %select{non-const lvalue\|rvalue}2 ref-qualifier">;
	def err_member_function_call_bad_type : Error<
	"cannot initialize object parameter of type %0 with an expression "
	"of type %1">;

	def warn_call_to_pure_virtual_member_function_from_ctor_dtor : Warning<
	"call to pure virtual member function %0 has undefined behavior; "
	"overrides of %0 in subclasses are not available in the "
	"%select{constructor\|destructor}1 of %2">, InGroup<PureVirtualCallFromCtorDtor>;

	def select_special_member_kind : TextSubstitution<
	"%select{default constructor\|copy constructor\|move constructor\|"
	"copy assignment operator\|move assignment operator\|destructor}0">;

	def note_member_declared_at : Note<"member is declared here">;
	def note_ivar_decl : Note<"instance variable is declared here">;
	def note_bitfield_decl : Note<"bit-field is declared here">;
	def note_implicit_param_decl : Note<"%0 is an implicit parameter">;
	def note_member_synthesized_at : Note<
	"in %select{implicit\|defaulted}0 %sub{select_special_member_kind}1 for %2 "
	"first required here">;
	def note_comparison_synthesized_at : Note<
	"in defaulted %sub{select_defaulted_comparison_kind}0 for %1 "
	"first required here">;
	def err_missing_default_ctor : Error<
	"%select{constructor for %1 must explicitly initialize the\|"
	"implicit default constructor for %1 must explicitly initialize the\|"
	"cannot use constructor inherited from base class %4;}0 "
	"%select{base class\|member}2 %3 %select{which\|which\|of %1}0 "
	"does not have a default constructor">;
	def note_due_to_dllexported_class : Note<
	"due to %0 being dllexported%select{\|; try compiling in C++11 mode}1">;

	def err_illegal_union_or_anon_struct_member : Error<
	"%select{anonymous struct\|union}0 member %1 has a non-trivial "
	"%sub{select_special_member_kind}2">;

	def warn_frame_address : Warning<
	"calling '%0' with a nonzero argument is unsafe">,
	InGroup<FrameAddress>, DefaultIgnore;

	def warn_cxx98_compat_nontrivial_union_or_anon_struct_member : Warning<
	"%select{anonymous struct\|union}0 member %1 with a non-trivial "
	"%sub{select_special_member_kind}2 is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;

	def note_nontrivial_virtual_dtor : Note<
	"destructor for %0 is not trivial because it is virtual">;
	def note_nontrivial_has_virtual : Note<
	"because type %0 has a virtual %select{member function\|base class}1">;
	def note_nontrivial_no_def_ctor : Note<
	"because %select{base class of \|field of \|}0type %1 has no "
	"default constructor">;
	def note_user_declared_ctor : Note<
	"implicit default constructor suppressed by user-declared constructor">;
	def note_nontrivial_no_copy : Note<
	"because no %select{<<ERROR>>\|constructor\|constructor\|assignment operator\|"
	"assignment operator\|<<ERROR>>}2 can be used to "
	"%select{<<ERROR>>\|copy\|move\|copy\|move\|<<ERROR>>}2 "
	"%select{base class\|field\|an object}0 of type %3">;
	def note_nontrivial_user_provided : Note<
	"because %select{base class of \|field of \|}0type %1 has a user-provided "
	"%sub{select_special_member_kind}2">;
	def note_nontrivial_default_member_init : Note<
	"because field %0 has an initializer">;
	def note_nontrivial_param_type : Note<
	"because its parameter is %diff{of type $, not $\|of the wrong type}2,3">;
	def note_nontrivial_default_arg : Note<"because it has a default argument">;
	def note_nontrivial_variadic : Note<"because it is a variadic function">;
	def note_nontrivial_subobject : Note<
	"because the function selected to %select{construct\|copy\|move\|copy\|move\|"
	"destroy}2 %select{base class\|field}0 of type %1 is not trivial">;
	def note_nontrivial_objc_ownership : Note<
	"because type %0 has a member with %select{no\|no\|__strong\|__weak\|"
	"__autoreleasing}1 ownership">;

	/// Selector for a TagTypeKind value.
	def select_tag_type_kind : TextSubstitution<
	"%select{struct\|interface\|union\|class\|enum}0">;

	def err_static_data_member_not_allowed_in_anon_struct : Error<
	"static data member %0 not allowed in anonymous "
	"%sub{select_tag_type_kind}1">;
	def ext_static_data_member_in_union : ExtWarn<
	"static data member %0 in union is a C++11 extension">, InGroup<CXX11>;
	def warn_cxx98_compat_static_data_member_in_union : Warning<
	"static data member %0 in union is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def ext_union_member_of_reference_type : ExtWarn<
	"union member %0 has reference type %1, which is a Microsoft extension">,
	InGroup<MicrosoftUnionMemberReference>;
	def err_union_member_of_reference_type : Error<
	"union member %0 has reference type %1">;
	def ext_anonymous_struct_union_qualified : Extension<
	"anonymous %select{struct\|union}0 cannot be '%1'">;
	def err_different_return_type_for_overriding_virtual_function : Error<
	"virtual function %0 has a different return type "
	"%diff{($) than the function it overrides (which has return type $)\|"
	"than the function it overrides}1,2">;
	def note_overridden_virtual_function : Note<
	"overridden virtual function is here">;
	def err_conflicting_overriding_cc_attributes : Error<
	"virtual function %0 has different calling convention attributes "
	"%diff{($) than the function it overrides (which has calling convention $)\|"
	"than the function it overrides}1,2">;
	def warn_overriding_method_missing_noescape : Warning<
	"parameter of overriding method should be annotated with "
	"__attribute__((noescape))">, InGroup<MissingNoEscape>;
	def note_overridden_marked_noescape : Note<
	"parameter of overridden method is annotated with __attribute__((noescape))">;
	def note_cat_conform_to_noescape_prot : Note<
	"%select{category\|class extension}0 conforms to protocol %1 which defines method %2">;

	def err_covariant_return_inaccessible_base : Error<
	"invalid covariant return for virtual function: %1 is a "
	"%select{private\|protected}2 base class of %0">, AccessControl;
	def err_covariant_return_ambiguous_derived_to_base_conv : Error<
	"return type of virtual function %3 is not covariant with the return type of "
	"the function it overrides (ambiguous conversion from derived class "
	"%0 to base class %1:%2)">;
	def err_covariant_return_not_derived : Error<
	"return type of virtual function %0 is not covariant with the return type of "
	"the function it overrides (%1 is not derived from %2)">;
	def err_covariant_return_incomplete : Error<
	"return type of virtual function %0 is not covariant with the return type of "
	"the function it overrides (%1 is incomplete)">;
	def err_covariant_return_type_different_qualifications : Error<
	"return type of virtual function %0 is not covariant with the return type of "
	"the function it overrides (%1 has different qualifiers than %2)">;
	def err_covariant_return_type_class_type_more_qualified : Error<
	"return type of virtual function %0 is not covariant with the return type of "
	"the function it overrides (class type %1 is more qualified than class "
	"type %2">;

	// C++ implicit special member functions
	def note_in_declaration_of_implicit_special_member : Note<
	"while declaring the implicit %sub{select_special_member_kind}1"
	" for %0">;

	// C++ constructors
	def err_constructor_cannot_be : Error<"constructor cannot be declared '%0'">;
	def err_invalid_qualified_constructor : Error<
	"'%0' qualifier is not allowed on a constructor">;
	def err_ref_qualifier_constructor : Error<
	"ref-qualifier '%select{&&\|&}0' is not allowed on a constructor">;

	def err_constructor_return_type : Error<
	"constructor cannot have a return type">;
	def err_constructor_redeclared : Error<"constructor cannot be redeclared">;
	def err_constructor_byvalue_arg : Error<
	"copy constructor must pass its first argument by reference">;
	def warn_no_constructor_for_refconst : Warning<
	"%select{struct\|interface\|union\|class\|enum}0 %1 does not declare any "
	"constructor to initialize its non-modifiable members">;
	def note_refconst_member_not_initialized : Note<
	"%select{const\|reference}0 member %1 will never be initialized">;
	def ext_ms_explicit_constructor_call : ExtWarn<
	"explicit constructor calls are a Microsoft extension">,
	InGroup<MicrosoftExplicitConstructorCall>;

	// C++ destructors
	def err_destructor_not_member : Error<
	"destructor must be a non-static member function">;
	def err_destructor_cannot_be : Error<"destructor cannot be declared '%0'">;
	def err_invalid_qualified_destructor : Error<
	"'%0' qualifier is not allowed on a destructor">;
	def err_ref_qualifier_destructor : Error<
	"ref-qualifier '%select{&&\|&}0' is not allowed on a destructor">;
	def err_destructor_return_type : Error<"destructor cannot have a return type">;
	def err_destructor_redeclared : Error<"destructor cannot be redeclared">;
	def err_destructor_with_params : Error<"destructor cannot have any parameters">;
	def err_destructor_variadic : Error<"destructor cannot be variadic">;
	def ext_destructor_typedef_name : ExtWarn<
	"destructor cannot be declared using a %select{typedef\|type alias}1 %0 "
	"of the class name">, DefaultError, InGroup<DiagGroup<"dtor-typedef">>;
	def err_undeclared_destructor_name : Error<
	"undeclared identifier %0 in destructor name">;
	def err_destructor_name : Error<
	"expected the class name after '~' to name the enclosing class">;
	def err_destructor_name_nontype : Error<
	"identifier %0 after '~' in destructor name does not name a type">;
	def err_destructor_expr_mismatch : Error<
	"identifier %0 in object destruction expression does not name the type "
	"%1 of the object being destroyed">;
	def err_destructor_expr_nontype : Error<
	"identifier %0 in object destruction expression does not name a type">;
	def err_destructor_expr_type_mismatch : Error<
	"destructor type %0 in object destruction expression does not match the "
	"type %1 of the object being destroyed">;
	def note_destructor_type_here : Note<
	"type %0 found by destructor name lookup">;
	def note_destructor_nontype_here : Note<
	"non-type declaration found by destructor name lookup">;
	def ext_dtor_named_in_wrong_scope : Extension<
	"ISO C++ requires the name after '::~' to be found in the same scope as "
	"the name before '::~'">, InGroup<DtorName>;
	def ext_qualified_dtor_named_in_lexical_scope : ExtWarn<
	"qualified destructor name only found in lexical scope; omit the qualifier "
	"to find this type name by unqualified lookup">, InGroup<DtorName>;
	def ext_dtor_name_ambiguous : Extension<
	"ISO C++ considers this destructor name lookup to be ambiguous">,
	InGroup<DtorName>;

	def err_destroy_attr_on_non_static_var : Error<
	"%select{no_destroy\|always_destroy}0 attribute can only be applied to a"
	" variable with static or thread storage duration">;

	def err_destructor_template : Error<
	"destructor cannot be declared as a template">;

	// C++ initialization
	def err_init_conversion_failed : Error<
	"cannot initialize %select{a variable\|a parameter\|template parameter\|"
	"return object\|statement expression result\|an "
	"exception object\|a member subobject\|an array element\|a new value\|a value\|a "
	"base class\|a constructor delegation\|a vector element\|a block element\|a "
	"block element\|a complex element\|a lambda capture\|a compound literal "
	- "initializer\|a related result\|a parameter of CF audited function}0 "
	+ "initializer\|a related result\|a parameter of CF audited function\|a "
	+ "structured binding\|a member subobject}0 "
	"%diff{of type $ with an %select{rvalue\|lvalue}2 of type $\|"
	"with an %select{rvalue\|lvalue}2 of incompatible type}1,3"
	"%select{\|: different classes%diff{ ($ vs $)\|}5,6"
	"\|: different number of parameters (%5 vs %6)"
	"\|: type mismatch at %ordinal5 parameter%diff{ ($ vs $)\|}6,7"
	"\|: different return type%diff{ ($ vs $)\|}5,6"
	"\|: different qualifiers (%5 vs %6)"
	"\|: different exception specifications}4">;
	def note_forward_class_conversion : Note<"%0 is not defined, but forward "
	"declared here; conversion would be valid if it was derived from %1">;

	def err_lvalue_to_rvalue_ref : Error<"rvalue reference %diff{to type $ cannot "
	"bind to lvalue of type $\|cannot bind to incompatible lvalue}0,1">;
	def err_lvalue_reference_bind_to_initlist : Error<
	"%select{non-const\|volatile}0 lvalue reference to type %1 cannot bind to an "
	"initializer list temporary">;
	def err_lvalue_reference_bind_to_temporary : Error<
	"%select{non-const\|volatile}0 lvalue reference %diff{to type $ cannot bind "
	"to a temporary of type $\|cannot bind to incompatible temporary}1,2">;
	def err_lvalue_reference_bind_to_unrelated : Error<
	"%select{non-const\|volatile}0 lvalue reference "
	"%diff{to type $ cannot bind to a value of unrelated type $\|"
	"cannot bind to a value of unrelated type}1,2">;
	def err_reference_bind_drops_quals : Error<
	"binding reference %diff{of type $ to value of type $\|to value}0,1 "
	"%select{drops %3 qualifier%plural{1:\|2:\|4:\|:s}4\|changes address space\|"
	"not permitted due to incompatible qualifiers}2">;
	def err_reference_bind_failed : Error<
	"reference %diff{to %select{type\|incomplete type}1 $ could not bind to an "
	"%select{rvalue\|lvalue}2 of type $\|could not bind to %select{rvalue\|lvalue}2 of "
	"incompatible type}0,3">;
	def err_reference_bind_temporary_addrspace : Error<
	"reference of type %0 cannot bind to a temporary object because of "
	"address space mismatch">;
	def err_reference_bind_init_list : Error<
	"reference to type %0 cannot bind to an initializer list">;
	def err_init_list_bad_dest_type : Error<
	"%select{\|non-aggregate }0type %1 cannot be initialized with an initializer "
	"list">;
	def warn_cxx20_compat_aggregate_init_with_ctors : Warning<
	"aggregate initialization of type %0 with user-declared constructors "
	"is incompatible with C++20">, DefaultIgnore, InGroup<CXX20Compat>;
	def warn_cxx17_compat_aggregate_init_paren_list : Warning<
	"aggregate initialization of type %0 from a parenthesized list of values "
	"is a C++20 extension">, DefaultIgnore, InGroup<CXX20>;

	def err_reference_bind_to_bitfield : Error<
	"%select{non-const\|volatile}0 reference cannot bind to "
	"bit-field%select{\| %1}2">;
	def err_reference_bind_to_vector_element : Error<
	"%select{non-const\|volatile}0 reference cannot bind to vector element">;
	def err_reference_bind_to_matrix_element : Error<
	"%select{non-const\|volatile}0 reference cannot bind to matrix element">;
	def err_reference_var_requires_init : Error<
	"declaration of reference variable %0 requires an initializer">;
	def err_reference_without_init : Error<
	"reference to type %0 requires an initializer">;
	def note_value_initialization_here : Note<
	"in value-initialization of type %0 here">;
	def err_reference_has_multiple_inits : Error<
	"reference cannot be initialized with multiple values">;
	def err_init_non_aggr_init_list : Error<
	"initialization of non-aggregate type %0 with an initializer list">;
	def err_init_reference_member_uninitialized : Error<
	"reference member of type %0 uninitialized">;
	def note_uninit_reference_member : Note<
	"uninitialized reference member is here">;
	def warn_field_is_uninit : Warning<"field %0 is uninitialized when used here">,
	InGroup<Uninitialized>;
	def warn_base_class_is_uninit : Warning<
	"base class %0 is uninitialized when used here to access %q1">,
	InGroup<Uninitialized>;
	def warn_reference_field_is_uninit : Warning<
	"reference %0 is not yet bound to a value when used here">,
	InGroup<Uninitialized>;
	def note_uninit_in_this_constructor : Note<
	"during field initialization in %select{this\|the implicit default}0 "
	"constructor">;
	def warn_static_self_reference_in_init : Warning<
	"static variable %0 is suspiciously used within its own initialization">,
	InGroup<UninitializedStaticSelfInit>;
	def warn_uninit_self_reference_in_init : Warning<
	"variable %0 is uninitialized when used within its own initialization">,
	InGroup<Uninitialized>;
	def warn_uninit_self_reference_in_reference_init : Warning<
	"reference %0 is not yet bound to a value when used within its own"
	" initialization">,
	InGroup<Uninitialized>;
	def warn_uninit_var : Warning<
	"variable %0 is uninitialized when %select{used here\|captured by block}1">,
	InGroup<Uninitialized>, DefaultIgnore;
	def warn_sometimes_uninit_var : Warning<
	"variable %0 is %select{used\|captured}1 uninitialized whenever "
	"%select{'%3' condition is %select{true\|false}4\|"
	"'%3' loop %select{is entered\|exits because its condition is false}4\|"
	"'%3' loop %select{condition is true\|exits because its condition is false}4\|"
	"switch %3 is taken\|"
	"its declaration is reached\|"
	"%3 is called}2">,
	InGroup<UninitializedSometimes>, DefaultIgnore;
	def warn_maybe_uninit_var : Warning<
	"variable %0 may be uninitialized when "
	"%select{used here\|captured by block}1">,
	InGroup<UninitializedMaybe>, DefaultIgnore;
	def note_var_declared_here : Note<"variable %0 is declared here">;
	def note_uninit_var_use : Note<
	"%select{uninitialized use occurs\|variable is captured by block}0 here">;
	def warn_uninit_byref_blockvar_captured_by_block : Warning<
	"block pointer variable %0 is %select{uninitialized\|null}1 when captured by "
	"block">, InGroup<Uninitialized>, DefaultIgnore;
	def note_block_var_fixit_add_initialization : Note<
	"did you mean to use __block %0?">;
	def note_in_omitted_aggregate_initializer : Note<
	"in implicit initialization of %select{"
	"array element %1 with omitted initializer\|"
	"field %1 with omitted initializer\|"
	"trailing array elements in runtime-sized array new}0">;
	def note_in_reference_temporary_list_initializer : Note<
	"in initialization of temporary of type %0 created to "
	"list-initialize this reference">;
	def note_var_fixit_add_initialization : Note<
	"initialize the variable %0 to silence this warning">;
	def note_uninit_fixit_remove_cond : Note<
	"remove the %select{'%1' if its condition\|condition if it}0 "
	"is always %select{false\|true}2">;
	def err_init_incomplete_type : Error<"initialization of incomplete type %0">;
	def err_list_init_in_parens : Error<
	"cannot initialize %select{non-class\|reference}0 type %1 with a "
	"parenthesized initializer list">;

	def warn_uninit_const_reference : Warning<
	"variable %0 is uninitialized when passed as a const reference argument "
	"here">, InGroup<UninitializedConstReference>, DefaultIgnore;

	def warn_unsequenced_mod_mod : Warning<
	"multiple unsequenced modifications to %0">, InGroup<Unsequenced>;
	def warn_unsequenced_mod_use : Warning<
	"unsequenced modification and access to %0">, InGroup<Unsequenced>;

	def select_initialized_entity_kind : TextSubstitution<
	"%select{copying variable\|copying parameter\|initializing template parameter\|"
	"returning object\|initializing statement expression result\|"
	"throwing object\|copying member subobject\|copying array element\|"
	"allocating object\|copying temporary\|initializing base subobject\|"
	"initializing vector element\|capturing value}0">;

	def err_temp_copy_no_viable : Error<
	"no viable constructor %sub{select_initialized_entity_kind}0 of type %1">;
	def ext_rvalue_to_reference_temp_copy_no_viable : Extension<
	"no viable constructor %sub{select_initialized_entity_kind}0 of type %1; "
	"C++98 requires a copy constructor when binding a reference to a temporary">,
	InGroup<BindToTemporaryCopy>;
	def err_temp_copy_ambiguous : Error<
	"ambiguous constructor call when %sub{select_initialized_entity_kind}0 "
	"of type %1">;
	def err_temp_copy_deleted : Error<
	"%sub{select_initialized_entity_kind}0 of type %1 "
	"invokes deleted constructor">;
	def err_temp_copy_incomplete : Error<
	"copying a temporary object of incomplete type %0">;
	def warn_cxx98_compat_temp_copy : Warning<
	"%sub{select_initialized_entity_kind}1 "
	"of type %2 when binding a reference to a temporary would %select{invoke "
	"an inaccessible constructor\|find no viable constructor\|find ambiguous "
	"constructors\|invoke a deleted constructor}0 in C++98">,
	InGroup<CXX98CompatBindToTemporaryCopy>, DefaultIgnore;
	def err_selected_explicit_constructor : Error<
	"chosen constructor is explicit in copy-initialization">;
	def note_explicit_ctor_deduction_guide_here : Note<
	"explicit %select{constructor\|deduction guide}0 declared here">;

	// C++11 decltype
	def err_decltype_in_declarator : Error<
	"'decltype' cannot be used to name a declaration">;

	// C++11 auto
	def warn_cxx98_compat_auto_type_specifier : Warning<
	"'auto' type specifier is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_auto_variable_cannot_appear_in_own_initializer : Error<
	"variable %0 declared with deduced type %1 "
	"cannot appear in its own initializer">;
	def err_binding_cannot_appear_in_own_initializer : Error<
	"binding %0 cannot appear in the initializer of its own "
	"decomposition declaration">;
	def err_illegal_decl_array_of_auto : Error<
	"'%0' declared as array of %1">;
	def err_new_array_of_auto : Error<
	"cannot allocate array of 'auto'">;
	def err_auto_not_allowed : Error<
	"%select{'auto'\|'decltype(auto)'\|'__auto_type'\|"
	"use of "
	"%select{class template\|function template\|variable template\|alias template\|"
	"template template parameter\|concept\|template}2 %3 requires template "
	"arguments; argument deduction}0 not allowed "
	"%select{in function prototype"
	"\|in non-static struct member\|in struct member"
	"\|in non-static union member\|in union member"
	"\|in non-static class member\|in interface member"
	"\|in exception declaration\|in template parameter until C++17\|in block literal"
	"\|in template argument\|in typedef\|in type alias\|in function return type"
	"\|in conversion function type\|here\|in lambda parameter"
	"\|in type allocated by 'new'\|in K&R-style function parameter"
	"\|in template parameter\|in friend declaration\|in function prototype that is "
	"not a function declaration\|in requires expression parameter}1">;
	def err_dependent_deduced_tst : Error<
	"typename specifier refers to "
	"%select{class template\|function template\|variable template\|alias template\|"
	"template template parameter\|template}0 member in %1; "
	"argument deduction not allowed here">;
	def err_deduced_tst : Error<
	"typename specifier refers to "
	"%select{class template\|function template\|variable template\|alias template\|"
	"template template parameter\|template}0; argument deduction not allowed "
	"here">;
	def err_auto_not_allowed_var_inst : Error<
	"'auto' variable template instantiation is not allowed">;
	def err_auto_var_requires_init : Error<
	"declaration of variable %0 with deduced type %1 requires an initializer">;
	def err_auto_new_requires_ctor_arg : Error<
	"new expression for type %0 requires a constructor argument">;
	def ext_auto_new_list_init : Extension<
	"ISO C++ standards before C++17 do not allow new expression for "
	"type %0 to use list-initialization">, InGroup<CXX17>;
	def err_auto_var_init_no_expression : Error<
	"initializer for variable %0 with type %1 is empty">;
	def err_auto_expr_init_no_expression : Error<
	"initializer for functional-style cast to %0 is empty">;
	def err_auto_var_init_multiple_expressions : Error<
	"initializer for variable %0 with type %1 contains multiple expressions">;
	def err_auto_expr_init_multiple_expressions : Error<
	"initializer for functional-style cast to %0 contains multiple expressions">;
	def err_auto_var_init_paren_braces : Error<
	"cannot deduce type for variable %1 with type %2 from "
	"%select{parenthesized\|nested}0 initializer list">;
	def err_auto_new_ctor_multiple_expressions : Error<
	"new expression for type %0 contains multiple constructor arguments">;
	def err_auto_expr_init_paren_braces : Error<
	"cannot deduce actual type for %1 from "
	"%select{parenthesized\|nested}0 initializer list">;
	def warn_cxx20_compat_auto_expr : Warning<
	"'auto' as a functional-style cast is incompatible with C++ standards "
	"before C++2b">, InGroup<CXXPre2bCompat>, DefaultIgnore;
	def err_auto_missing_trailing_return : Error<
	"'auto' return without trailing return type; deduced return types are a "
	"C++14 extension">;
	def err_deduced_return_type : Error<
	"deduced return types are a C++14 extension">;
	def err_trailing_return_without_auto : Error<
	"function with trailing return type must specify return type 'auto', not %0">;
	def err_trailing_return_in_parens : Error<
	"trailing return type may not be nested within parentheses">;
	def err_auto_var_deduction_failure : Error<
	"variable %0 with type %1 has incompatible initializer of type %2">;
	def err_auto_var_deduction_failure_from_init_list : Error<
	"cannot deduce actual type for variable %0 with type %1 from initializer list">;
	def err_auto_expr_deduction_failure : Error<
	"functional-style cast to %0 has incompatible initializer of type %1">;
	def err_auto_new_deduction_failure : Error<
	"new expression for type %0 has incompatible constructor argument of type %1">;
	def err_auto_inconsistent_deduction : Error<
	"deduced conflicting types %diff{($ vs $) \|}0,1"
	"for initializer list element type">;
	def err_auto_different_deductions : Error<
	"%select{'auto'\|'decltype(auto)'\|'__auto_type'\|template arguments}0 "
	"deduced as %1 in declaration of %2 and "
	"deduced as %3 in declaration of %4">;
	def err_auto_non_deduced_not_alone : Error<
	"%select{function with deduced return type\|"
	"declaration with trailing return type}0 "
	"must be the only declaration in its group">;
	def err_implied_std_initializer_list_not_found : Error<
	"cannot deduce type of initializer list because std::initializer_list was "
	"not found; include <initializer_list>">;
	def err_malformed_std_initializer_list : Error<
	"std::initializer_list must be a class template with a single type parameter">;
	def err_auto_init_list_from_c : Error<
	"cannot use __auto_type with initializer list in C">;
	def err_auto_bitfield : Error<
	"cannot pass bit-field as __auto_type initializer in C">;

	// C++1y decltype(auto) type
	def err_decltype_auto_invalid : Error<
	"'decltype(auto)' not allowed here">;
	def err_decltype_auto_cannot_be_combined : Error<
	"'decltype(auto)' cannot be combined with other type specifiers">;
	def err_decltype_auto_function_declarator_not_declaration : Error<
	"'decltype(auto)' can only be used as a return type "
	"in a function declaration">;
	def err_decltype_auto_compound_type : Error<
	"cannot form %select{pointer to\|reference to\|array of}0 'decltype(auto)'">;
	def err_decltype_auto_initializer_list : Error<
	"cannot deduce 'decltype(auto)' from initializer list">;

	// C++17 deduced class template specialization types
	def err_deduced_class_template_compound_type : Error<
	"cannot %select{form pointer to\|form reference to\|form array of\|"
	"form function returning\|use parentheses when declaring variable with}0 "
	"deduced class template specialization type">;
	def err_deduced_non_class_template_specialization_type : Error<
	"%select{<error>\|function template\|variable template\|alias template\|"
	"template template parameter\|concept\|template}0 %1 requires template "
	"arguments; argument deduction only allowed for class templates">;
	def err_deduced_class_template_ctor_ambiguous : Error<
	"ambiguous deduction for template arguments of %0">;
	def err_deduced_class_template_ctor_no_viable : Error<
	"no viable constructor or deduction guide for deduction of "
	"template arguments of %0">;
	def err_deduced_class_template_incomplete : Error<
	"template %0 has no definition and no %select{\|viable }1deduction guides "
	"for deduction of template arguments">;
	def err_deduced_class_template_deleted : Error<
	"class template argument deduction for %0 selected a deleted constructor">;
	def err_deduced_class_template_explicit : Error<
	"class template argument deduction for %0 selected an explicit "
	"%select{constructor\|deduction guide}1 for copy-list-initialization">;
	def err_deduction_guide_no_trailing_return_type : Error<
	"deduction guide declaration without trailing return type">;
	def err_deduction_guide_bad_trailing_return_type : Error<
	"deduced type %1 of deduction guide is not %select{\|written as }2"
	"a specialization of template %0">;
	def err_deduction_guide_with_complex_decl : Error<
	"cannot specify any part of a return type in the "
	"declaration of a deduction guide">;
	def err_deduction_guide_invalid_specifier : Error<
	"deduction guide cannot be declared '%0'">;
	def err_deduction_guide_name_not_class_template : Error<
	"cannot specify deduction guide for "
	"%select{<error>\|function template\|variable template\|alias template\|"
	"template template parameter\|concept\|dependent template name}0 %1">;
	def err_deduction_guide_wrong_scope : Error<
	"deduction guide must be declared in the same scope as template %q0">;
	def err_deduction_guide_defines_function : Error<
	"deduction guide cannot have a function definition">;
	def err_deduction_guide_redeclared : Error<
	"redeclaration of deduction guide">;
	def err_deduction_guide_specialized : Error<"deduction guide cannot be "
	"%select{explicitly instantiated\|explicitly specialized}0">;
	def err_deduction_guide_template_not_deducible : Error<
	"deduction guide template contains "
	"%select{a template parameter\|template parameters}0 that cannot be "
	"deduced">;
	def err_deduction_guide_wrong_access : Error<
	"deduction guide has different access from the corresponding "
	"member template">;
	def note_deduction_guide_template_access : Note<
	"member template declared %0 here">;
	def note_deduction_guide_access : Note<
	"deduction guide declared %0 by intervening access specifier">;
	def warn_cxx14_compat_class_template_argument_deduction : Warning<
	"class template argument deduction is incompatible with C++ standards "
	"before C++17%select{\|; for compatibility, use explicit type name %1}0">,
	InGroup<CXXPre17Compat>, DefaultIgnore;
	def warn_ctad_maybe_unsupported : Warning<
	"%0 may not intend to support class template argument deduction">,
	InGroup<CTADMaybeUnsupported>, DefaultIgnore;
	def note_suppress_ctad_maybe_unsupported : Note<
	"add a deduction guide to suppress this warning">;


	// C++14 deduced return types
	def err_auto_fn_deduction_failure : Error<
	"cannot deduce return type %0 from returned value of type %1">;
	def err_auto_fn_different_deductions : Error<
	"'%select{auto\|decltype(auto)}0' in return type deduced as %1 here but "
	"deduced as %2 in earlier return statement">;
	def err_auto_fn_used_before_defined : Error<
	"function %0 with deduced return type cannot be used before it is defined">;
	def err_auto_fn_no_return_but_not_auto : Error<
	"cannot deduce return type %0 for function with no return statements">;
	def err_auto_fn_return_void_but_not_auto : Error<
	"cannot deduce return type %0 from omitted return expression">;
	def err_auto_fn_return_init_list : Error<
	"cannot deduce return type from initializer list">;
	def err_auto_fn_virtual : Error<
	"function with deduced return type cannot be virtual">;
	def warn_cxx11_compat_deduced_return_type : Warning<
	"return type deduction is incompatible with C++ standards before C++14">,
	InGroup<CXXPre14Compat>, DefaultIgnore;

	// C++11 override control
	def override_keyword_only_allowed_on_virtual_member_functions : Error<
	"only virtual member functions can be marked '%0'">;
	def override_keyword_hides_virtual_member_function : Error<
	"non-virtual member function marked '%0' hides virtual member "
	"%select{function\|functions}1">;
	def err_function_marked_override_not_overriding : Error<
	"%0 marked 'override' but does not override any member functions">;
	def warn_destructor_marked_not_override_overriding : TextSubstitution <
	"%0 overrides a destructor but is not marked 'override'">;
	def warn_function_marked_not_override_overriding : TextSubstitution <
	"%0 overrides a member function but is not marked 'override'">;
	def warn_inconsistent_destructor_marked_not_override_overriding : Warning <
	"%sub{warn_destructor_marked_not_override_overriding}0">,
	InGroup<CXX11WarnInconsistentOverrideDestructor>, DefaultIgnore;
	def warn_inconsistent_function_marked_not_override_overriding : Warning <
	"%sub{warn_function_marked_not_override_overriding}0">,
	InGroup<CXX11WarnInconsistentOverrideMethod>;
	def warn_suggest_destructor_marked_not_override_overriding : Warning <
	"%sub{warn_destructor_marked_not_override_overriding}0">,
	InGroup<CXX11WarnSuggestOverrideDestructor>, DefaultIgnore;
	def warn_suggest_function_marked_not_override_overriding : Warning <
	"%sub{warn_function_marked_not_override_overriding}0">,
	InGroup<CXX11WarnSuggestOverride>, DefaultIgnore;
	def err_class_marked_final_used_as_base : Error<
	"base %0 is marked '%select{final\|sealed}1'">;
	def warn_abstract_final_class : Warning<
	"abstract class is marked '%select{final\|sealed}0'">, InGroup<AbstractFinalClass>;
	def warn_final_dtor_non_final_class : Warning<
	"class with destructor marked '%select{final\|sealed}0' cannot be inherited from">,
	InGroup<FinalDtorNonFinalClass>;
	def note_final_dtor_non_final_class_silence : Note<
	"mark %0 as '%select{final\|sealed}1' to silence this warning">;

	// C++11 attributes
	def err_repeat_attribute : Error<"%0 attribute cannot be repeated">;

	// C++11 final
	def err_final_function_overridden : Error<
	"declaration of %0 overrides a '%select{final\|sealed}1' function">;

	// C++11 scoped enumerations
	def err_enum_invalid_underlying : Error<
	"non-integral type %0 is an invalid underlying type">;
	def err_enumerator_too_large : Error<
	"enumerator value is not representable in the underlying type %0">;
	def ext_enumerator_too_large : Extension<
	"enumerator value is not representable in the underlying type %0">,
	InGroup<MicrosoftEnumValue>;
	def err_enumerator_wrapped : Error<
	"enumerator value %0 is not representable in the underlying type %1">;
	def err_enum_redeclare_type_mismatch : Error<
	"enumeration redeclared with different underlying type %0 (was %1)">;
	def err_enum_redeclare_fixed_mismatch : Error<
	"enumeration previously declared with %select{non\|}0fixed underlying type">;
	def err_enum_redeclare_scoped_mismatch : Error<
	"enumeration previously declared as %select{un\|}0scoped">;
	def err_only_enums_have_underlying_types : Error<
	"only enumeration types have underlying types">;
	def err_underlying_type_of_incomplete_enum : Error<
	"cannot determine underlying type of incomplete enumeration type %0">;

	// C++11 delegating constructors
	def err_delegating_ctor : Error<
	"delegating constructors are permitted only in C++11">;
	def warn_cxx98_compat_delegating_ctor : Warning<
	"delegating constructors are incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_delegating_initializer_alone : Error<
	"an initializer for a delegating constructor must appear alone">;
	def warn_delegating_ctor_cycle : Warning<
	"constructor for %0 creates a delegation cycle">, DefaultError,
	InGroup<DelegatingCtorCycles>;
	def note_it_delegates_to : Note<"it delegates to">;
	def note_which_delegates_to : Note<"which delegates to">;

	// C++11 range-based for loop
	def err_for_range_decl_must_be_var : Error<
	"for range declaration must declare a variable">;
	def err_for_range_storage_class : Error<
	"loop variable %0 may not be declared %select{'extern'\|'static'\|"
	"'__private_extern__'\|'auto'\|'register'\|'constexpr'\|'thread_local'}1">;
	def err_type_defined_in_for_range : Error<
	"types may not be defined in a for range declaration">;
	def err_for_range_deduction_failure : Error<
	"cannot use type %0 as a range">;
	def err_for_range_incomplete_type : Error<
	"cannot use incomplete type %0 as a range">;
	def err_for_range_iter_deduction_failure : Error<
	"cannot use type %0 as an iterator">;
	def ext_for_range_begin_end_types_differ : ExtWarn<
	"'begin' and 'end' returning different types (%0 and %1) is a C++17 extension">,
	InGroup<CXX17>;
	def warn_for_range_begin_end_types_differ : Warning<
	"'begin' and 'end' returning different types (%0 and %1) is incompatible "
	"with C++ standards before C++17">, InGroup<CXXPre17Compat>, DefaultIgnore;
	def note_in_for_range: Note<
	"when looking up '%select{begin\|end}0' function for range expression "
	"of type %1">;
	def err_for_range_invalid: Error<
	"invalid range expression of type %0; no viable '%select{begin\|end}1' "
	"function available">;
	def note_for_range_member_begin_end_ignored : Note<
	"member is not a candidate because range type %0 has no '%select{end\|begin}1' member">;
	def err_range_on_array_parameter : Error<
	"cannot build range expression with array function parameter %0 since "
	"parameter with array type %1 is treated as pointer type %2">;
	def err_for_range_dereference : Error<
	"invalid range expression of type %0; did you mean to dereference it "
	"with '*'?">;
	def note_for_range_invalid_iterator : Note <
	"in implicit call to 'operator%select{!=\|*\|++}0' for iterator of type %1">;
	def note_for_range_begin_end : Note<
	"selected '%select{begin\|end}0' %select{function\|template }1%2 with iterator type %3">;
	def warn_for_range_const_ref_binds_temp_built_from_ref : Warning<
	"loop variable %0 "
	"%diff{of type $ binds to a temporary constructed from type $"
	"\|binds to a temporary constructed from a different type}1,2">,
	InGroup<RangeLoopConstruct>, DefaultIgnore;
	def note_use_type_or_non_reference : Note<
	"use non-reference type %0 to make construction explicit or type %1 to prevent copying">;
	def warn_for_range_ref_binds_ret_temp : Warning<
	"loop variable %0 binds to a temporary value produced by a range of type %1">,
	InGroup<RangeLoopBindReference>, DefaultIgnore;
	def note_use_non_reference_type : Note<"use non-reference type %0">;
	def warn_for_range_copy : Warning<
	"loop variable %0 creates a copy from type %1">,
	InGroup<RangeLoopConstruct>, DefaultIgnore;
	def note_use_reference_type : Note<"use reference type %0 to prevent copying">;
	def err_objc_for_range_init_stmt : Error<
	"initialization statement is not supported when iterating over Objective-C "
	"collection">;

	// C++11 constexpr
	def warn_cxx98_compat_constexpr : Warning<
	"'constexpr' specifier is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	// FIXME: Maybe this should also go in -Wc++14-compat?
	def warn_cxx14_compat_constexpr_not_const : Warning<
	"'constexpr' non-static member function will not be implicitly 'const' "
	"in C++14; add 'const' to avoid a change in behavior">,
	InGroup<DiagGroup<"constexpr-not-const">>;
	def err_invalid_consteval_take_address : Error<
	"cannot take address of consteval function %0 outside"
	" of an immediate invocation">;
	def err_invalid_consteval_call : Error<
	"call to consteval function %q0 is not a constant expression">;
	def note_invalid_consteval_initializer : Note<
	"in the default initalizer of %0">;
	def note_invalid_consteval_initializer_here : Note<
	"initialized here %0">;
	def err_invalid_consteval_decl_kind : Error<
	"%0 cannot be declared consteval">;
	def err_invalid_constexpr : Error<
	"%select{function parameter\|typedef}0 "
	"cannot be %sub{select_constexpr_spec_kind}1">;
	def err_invalid_constexpr_member : Error<"non-static data member cannot be "
	"constexpr%select{; did you intend to make it %select{const\|static}0?\|}1">;
	def err_constexpr_tag : Error<
	"%select{class\|struct\|interface\|union\|enum}0 "
	"cannot be marked %sub{select_constexpr_spec_kind}1">;
	def err_constexpr_dtor : Error<
	"destructor cannot be declared %sub{select_constexpr_spec_kind}0">;
	def err_constexpr_dtor_subobject : Error<
	"destructor cannot be declared %sub{select_constexpr_spec_kind}0 because "
	"%select{data member %2\|base class %3}1 does not have a "
	"constexpr destructor">;
	def note_constexpr_dtor_subobject : Note<
	"%select{data member %1\|base class %2}0 declared here">;
	def err_constexpr_wrong_decl_kind : Error<
	"%sub{select_constexpr_spec_kind}0 can only be used "
	"in %select{\|variable and function\|function\|variable}0 declarations">;
	def err_invalid_constexpr_var_decl : Error<
	"constexpr variable declaration must be a definition">;
	def err_constexpr_static_mem_var_requires_init : Error<
	"declaration of constexpr static data member %0 requires an initializer">;
	def err_constexpr_var_non_literal : Error<
	"constexpr variable cannot have non-literal type %0">;
	def err_constexpr_var_requires_const_init : Error<
	"constexpr variable %0 must be initialized by a constant expression">;
	def err_constexpr_var_requires_const_destruction : Error<
	"constexpr variable %0 must have constant destruction">;
	def err_constexpr_redecl_mismatch : Error<
	"%select{non-constexpr\|constexpr\|consteval}1 declaration of %0"
	" follows %select{non-constexpr\|constexpr\|consteval}2 declaration">;
	def err_constexpr_virtual : Error<"virtual function cannot be constexpr">;
	def warn_cxx17_compat_constexpr_virtual : Warning<
	"virtual constexpr functions are incompatible with "
	"C++ standards before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def err_constexpr_virtual_base : Error<
	"constexpr %select{member function\|constructor}0 not allowed in "
	"%select{struct\|interface\|class}1 with virtual base "
	"%plural{1:class\|:classes}2">;
	def note_non_literal_incomplete : Note<
	"incomplete type %0 is not a literal type">;
	def note_non_literal_virtual_base : Note<"%select{struct\|interface\|class}0 "
	"with virtual base %plural{1:class\|:classes}1 is not a literal type">;
	def note_constexpr_virtual_base_here : Note<"virtual base class declared here">;
	def err_constexpr_non_literal_return : Error<
	"%select{constexpr\|consteval}0 function's return type %1 is not a literal type">;
	def err_constexpr_non_literal_param : Error<
	"%select{constexpr\|consteval}2 %select{function\|constructor}1's %ordinal0 parameter type %3 is "
	"not a literal type">;
	def err_constexpr_body_invalid_stmt : Error<
	"statement not allowed in %select{constexpr\|consteval}1 %select{function\|constructor}0">;
	def ext_constexpr_body_invalid_stmt : ExtWarn<
	"use of this statement in a constexpr %select{function\|constructor}0 "
	"is a C++14 extension">, InGroup<CXX14>;
	def warn_cxx11_compat_constexpr_body_invalid_stmt : Warning<
	"use of this statement in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++14">,
	InGroup<CXXPre14Compat>, DefaultIgnore;
	def ext_constexpr_body_invalid_stmt_cxx20 : ExtWarn<
	"use of this statement in a constexpr %select{function\|constructor}0 "
	"is a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_constexpr_body_invalid_stmt : Warning<
	"use of this statement in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_constexpr_body_invalid_stmt_cxx2b : ExtWarn<
	"use of this statement in a constexpr %select{function\|constructor}0 "
	"is a C++2b extension">, InGroup<CXX2b>;
	def warn_cxx20_compat_constexpr_body_invalid_stmt : Warning<
	"use of this statement in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++2b">,
	InGroup<CXXPre2bCompat>, DefaultIgnore;
	def ext_constexpr_type_definition : ExtWarn<
	"type definition in a constexpr %select{function\|constructor}0 "
	"is a C++14 extension">, InGroup<CXX14>;
	def warn_cxx11_compat_constexpr_type_definition : Warning<
	"type definition in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++14">,
	InGroup<CXXPre14Compat>, DefaultIgnore;
	def err_constexpr_vla : Error<
	"variably-modified type %0 cannot be used in a constexpr "
	"%select{function\|constructor}1">;
	def ext_constexpr_local_var : ExtWarn<
	"variable declaration in a constexpr %select{function\|constructor}0 "
	"is a C++14 extension">, InGroup<CXX14>;
	def warn_cxx11_compat_constexpr_local_var : Warning<
	"variable declaration in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++14">,
	InGroup<CXXPre14Compat>, DefaultIgnore;
	def ext_constexpr_static_var : ExtWarn<
	"definition of a %select{static\|thread_local}1 variable "
	"in a constexpr %select{function\|constructor}0 "
	"is a C++2b extension">, InGroup<CXX2b>;
	def warn_cxx20_compat_constexpr_var : Warning<
	"definition of a %select{static variable\|thread_local variable\|variable "
	"of non-literal type}1 in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++2b">,
	InGroup<CXXPre2bCompat>, DefaultIgnore;
	def err_constexpr_local_var_non_literal_type : Error<
	"variable of non-literal type %1 cannot be defined in a constexpr "
	"%select{function\|constructor}0 before C++2b">;
	def ext_constexpr_local_var_no_init : ExtWarn<
	"uninitialized variable in a constexpr %select{function\|constructor}0 "
	"is a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_constexpr_local_var_no_init : Warning<
	"uninitialized variable in a constexpr %select{function\|constructor}0 "
	"is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_constexpr_function_never_constant_expr : ExtWarn<
	"%select{constexpr\|consteval}1 %select{function\|constructor}0 never produces a "
	"constant expression">, InGroup<DiagGroup<"invalid-constexpr">>, DefaultError;
	def err_attr_cond_never_constant_expr : Error<
	"%0 attribute expression never produces a constant expression">;
	def err_diagnose_if_invalid_diagnostic_type : Error<
	"invalid diagnostic type for 'diagnose_if'; use \"error\" or \"warning\" "
	"instead">;
	def err_constexpr_body_no_return : Error<
	"no return statement in %select{constexpr\|consteval}0 function">;
	def err_constexpr_return_missing_expr : Error<
	"non-void %select{constexpr\|consteval}1 function %0 should return a value">;
	def warn_cxx11_compat_constexpr_body_no_return : Warning<
	"constexpr function with no return statements is incompatible with C++ "
	"standards before C++14">, InGroup<CXXPre14Compat>, DefaultIgnore;
	def ext_constexpr_body_multiple_return : ExtWarn<
	"multiple return statements in constexpr function is a C++14 extension">,
	InGroup<CXX14>;
	def warn_cxx11_compat_constexpr_body_multiple_return : Warning<
	"multiple return statements in constexpr function "
	"is incompatible with C++ standards before C++14">,
	InGroup<CXXPre14Compat>, DefaultIgnore;
	def note_constexpr_body_previous_return : Note<
	"previous return statement is here">;

	// C++20 function try blocks in constexpr
	def ext_constexpr_function_try_block_cxx20 : ExtWarn<
	"function try block in constexpr %select{function\|constructor}0 is "
	"a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_constexpr_function_try_block : Warning<
	"function try block in constexpr %select{function\|constructor}0 is "
	"incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;

	def ext_constexpr_union_ctor_no_init : ExtWarn<
	"constexpr union constructor that does not initialize any member "
	"is a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_constexpr_union_ctor_no_init : Warning<
	"constexpr union constructor that does not initialize any member "
	"is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_constexpr_ctor_missing_init : ExtWarn<
	"constexpr constructor that does not initialize all members "
	"is a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_constexpr_ctor_missing_init : Warning<
	"constexpr constructor that does not initialize all members "
	"is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;
	def note_constexpr_ctor_missing_init : Note<
	"member not initialized by constructor">;
	def note_non_literal_no_constexpr_ctors : Note<
	"%0 is not literal because it is not an aggregate and has no constexpr "
	"constructors other than copy or move constructors">;
	def note_non_literal_base_class : Note<
	"%0 is not literal because it has base class %1 of non-literal type">;
	def note_non_literal_field : Note<
	"%0 is not literal because it has data member %1 of "
	"%select{non-literal\|volatile}3 type %2">;
	def note_non_literal_user_provided_dtor : Note<
	"%0 is not literal because it has a user-provided destructor">;
	def note_non_literal_nontrivial_dtor : Note<
	"%0 is not literal because it has a non-trivial destructor">;
	def note_non_literal_non_constexpr_dtor : Note<
	"%0 is not literal because its destructor is not constexpr">;
	def note_non_literal_lambda : Note<
	"lambda closure types are non-literal types before C++17">;
	def warn_private_extern : Warning<
	"use of __private_extern__ on a declaration may not produce external symbol "
	"private to the linkage unit and is deprecated">, InGroup<PrivateExtern>;
	def note_private_extern : Note<
	"use __attribute__((visibility(\"hidden\"))) attribute instead">;

	// C++ Concepts
	def err_concept_decls_may_only_appear_in_global_namespace_scope : Error<
	"concept declarations may only appear in global or namespace scope">;
	def err_concept_no_parameters : Error<
	"concept template parameter list must have at least one parameter; explicit "
	"specialization of concepts is not allowed">;
	def err_concept_extra_headers : Error<
	"extraneous template parameter list in concept definition">;
	def err_concept_no_associated_constraints : Error<
	"concept cannot have associated constraints">;
	def err_non_constant_constraint_expression : Error<
	"substitution into constraint expression resulted in a non-constant "
	"expression">;
	def err_non_bool_atomic_constraint : Error<
	"atomic constraint must be of type 'bool' (found %0)">;
	def err_template_arg_list_constraints_not_satisfied : Error<
	"constraints not satisfied for %select{class template\|function template\|variable template\|alias template\|"
	"template template parameter\|template}0 %1%2">;
	def note_substituted_constraint_expr_is_ill_formed : Note<
	"because substituted constraint expression is ill-formed%0">;
	def note_constraint_references_error
	: Note<"constraint depends on a previously diagnosed expression">;
	def note_atomic_constraint_evaluated_to_false : Note<
	"%select{and\|because}0 '%1' evaluated to false">;
	def note_concept_specialization_constraint_evaluated_to_false : Note<
	"%select{and\|because}0 '%1' evaluated to false">;
	def note_single_arg_concept_specialization_constraint_evaluated_to_false : Note<
	"%select{and\|because}0 %1 does not satisfy %2">;
	def note_atomic_constraint_evaluated_to_false_elaborated : Note<
	"%select{and\|because}0 '%1' (%2 %3 %4) evaluated to false">;
	def err_constrained_virtual_method : Error<
	"virtual function cannot have a requires clause">;
	def err_trailing_requires_clause_on_deduction_guide : Error<
	"deduction guide cannot have a requires clause">;
	def err_constrained_non_templated_function
	: Error<"non-templated function cannot have a requires clause">;
	def err_reference_to_function_with_unsatisfied_constraints : Error<
	"invalid reference to function %0: constraints not satisfied">;
	def err_requires_expr_local_parameter_default_argument : Error<
	"default arguments not allowed for parameters of a requires expression">;
	def err_requires_expr_parameter_referenced_in_evaluated_context : Error<
	"constraint variable %0 cannot be used in an evaluated context">;
	def note_expr_requirement_expr_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid: %2">;
	def note_expr_requirement_expr_unknown_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid">;
	def note_expr_requirement_noexcept_not_met : Note<
	"%select{and\|because}0 '%1' may throw an exception">;
	def note_expr_requirement_type_requirement_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid: %2">;
	def note_expr_requirement_type_requirement_unknown_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid">;
	def note_expr_requirement_constraints_not_satisfied : Note<
	"%select{and\|because}0 type constraint '%1' was not satisfied:">;
	def note_expr_requirement_constraints_not_satisfied_simple : Note<
	"%select{and\|because}0 %1 does not satisfy %2:">;
	def note_type_requirement_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid: %2">;
	def note_type_requirement_unknown_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid">;
	def note_nested_requirement_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid%2">;
	def note_nested_requirement_unknown_substitution_error : Note<
	"%select{and\|because}0 '%1' would be invalid">;
	def note_ambiguous_atomic_constraints : Note<
	"similar constraint expressions not considered equivalent; constraint "
	"expressions cannot be considered equivalent unless they originate from the "
	"same concept">;
	def note_ambiguous_atomic_constraints_similar_expression : Note<
	"similar constraint expression here">;
	def err_unsupported_placeholder_constraint : Error<
	"constrained placeholder types other than simple 'auto' on non-type template "
	"parameters not supported yet">;

	def err_template_different_requires_clause : Error<
	"requires clause differs in template redeclaration">;
	def err_template_different_type_constraint : Error<
	"type constraint differs in template redeclaration">;
	def err_template_template_parameter_not_at_least_as_constrained : Error<
	"template template argument %0 is more constrained than template template "
	"parameter %1">;

	def err_type_constraint_non_type_concept : Error<
	"concept named in type constraint is not a type concept">;
	def err_type_constraint_missing_arguments : Error<
	"%0 requires more than 1 template argument; provide the remaining arguments "
	"explicitly to use it here">;
	def err_placeholder_constraints_not_satisfied : Error<
	"deduced type %0 does not satisfy %1">;

	// C++11 char16_t/char32_t
	def warn_cxx98_compat_unicode_type : Warning<
	"'%0' type specifier is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def warn_cxx17_compat_unicode_type : Warning<
	"'char8_t' type specifier is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;

	// __make_integer_seq
	def err_integer_sequence_negative_length : Error<
	"integer sequences must have non-negative sequence length">;
	def err_integer_sequence_integral_element_type : Error<
	"integer sequences must have integral element type">;

	// __type_pack_element
	def err_type_pack_element_out_of_bounds : Error<
	"a parameter pack may not be accessed at an out of bounds index">;

	// Objective-C++
	def err_objc_decls_may_only_appear_in_global_scope : Error<
	"Objective-C declarations may only appear in global scope">;
	def warn_auto_var_is_id : Warning<
	"'auto' deduced as 'id' in declaration of %0">,
	InGroup<DiagGroup<"auto-var-id">>;

	// Attributes
	def warn_attribute_ignored_no_calls_in_stmt: Warning<
	"%0 attribute is ignored because there exists no call expression inside the "
	"statement">,
	InGroup<IgnoredAttributes>;

	def warn_function_attribute_ignored_in_stmt : Warning<
	"attribute is ignored on this statement as it only applies to functions; "
	"use '%0' on statements">,
	InGroup<IgnoredAttributes>;

	def err_musttail_needs_trivial_args : Error<
	"tail call requires that the return value, all parameters, and any "
	"temporaries created by the expression are trivially destructible">;
	def err_musttail_needs_call : Error<
	"%0 attribute requires that the return value is the result of a function call"
	>;
	def err_musttail_needs_prototype : Error<
	"%0 attribute requires that both caller and callee functions have a "
	"prototype">;
	def note_musttail_fix_non_prototype : Note<
	"add 'void' to the parameter list to turn an old-style K&R function "
	"declaration into a prototype">;
	def err_musttail_structors_forbidden : Error<"cannot perform a tail call "
	"%select{from\|to}0 a %select{constructor\|destructor}1">;
	def note_musttail_structors_forbidden : Note<"target "
	"%select{constructor\|destructor}0 is declared here">;
	def err_musttail_forbidden_from_this_context : Error<
	"%0 attribute cannot be used from "
	"%select{a block\|an Objective-C function\|this context}1">;
	def err_musttail_member_mismatch : Error<
	"%select{non-member\|static member\|non-static member}0 "
	"function cannot perform a tail call to "
	"%select{non-member\|static member\|non-static member\|pointer-to-member}1 "
	"function%select{\| %3}2">;
	def note_musttail_callee_defined_here : Note<"%0 declared here">;
	def note_tail_call_required : Note<"tail call required by %0 attribute here">;
	def err_musttail_mismatch : Error<
	"cannot perform a tail call to function%select{\| %1}0 because its signature "
	"is incompatible with the calling function">;
	def note_musttail_mismatch : Note<
	"target function "
	"%select{is a member of different class%diff{ (expected $ but has $)\|}1,2"
	"\|has different number of parameters (expected %1 but has %2)"
	"\|has type mismatch at %ordinal3 parameter"
	"%diff{ (expected $ but has $)\|}1,2"
	"\|has different return type%diff{ ($ expected but has $)\|}1,2}0">;
	def err_musttail_callconv_mismatch : Error<
	"cannot perform a tail call to function%select{\| %1}0 because it uses an "
	"incompatible calling convention">;
	def note_musttail_callconv_mismatch : Note<
	"target function has calling convention %1 (expected %0)">;
	def err_musttail_scope : Error<
	"cannot perform a tail call from this return statement">;
	def err_musttail_no_variadic : Error<
	"%0 attribute may not be used with variadic functions">;

	def err_nsobject_attribute : Error<
	"'NSObject' attribute is for pointer types only">;
	def err_attributes_are_not_compatible : Error<
	"%0 and %1 attributes are not compatible">;
	def err_attribute_invalid_argument : Error<
	"%select{a reference type\|an array type\|a non-vector or "
	"non-vectorizable scalar type}0 is an invalid argument to attribute %1">;
	def err_attribute_wrong_number_arguments : Error<
	"%0 attribute %plural{0:takes no arguments\|1:takes one argument\|"
	":requires exactly %1 arguments}1">;
	def err_attribute_wrong_number_arguments_for : Error <
	"%0 attribute references function %1, which %plural{0:takes no arguments\|1:takes one argument\|"
	":takes exactly %2 arguments}2">;
	def err_attribute_bounds_for_function : Error<
	"%0 attribute references parameter %1, but the function %2 has only %3 parameters">;
	def err_attribute_no_member_function : Error<
	"%0 attribute cannot be applied to non-static member functions">;
	def err_attribute_parameter_types : Error<
	"%0 attribute parameter types do not match: parameter %1 of function %2 has type %3, "
	"but parameter %4 of function %5 has type %6">;

	def err_attribute_too_many_arguments : Error<
	"%0 attribute takes no more than %1 argument%s1">;
	def err_attribute_too_few_arguments : Error<
	"%0 attribute takes at least %1 argument%s1">;
	def err_attribute_invalid_vector_type : Error<"invalid vector element type %0">;
	def err_attribute_invalid_bitint_vector_type : Error<
	"'_BitInt' vector element width must be %select{a power of 2\|"
	"at least as wide as 'CHAR_BIT'}0">;
	def err_attribute_invalid_matrix_type : Error<"invalid matrix element type %0">;
	def err_attribute_bad_neon_vector_size : Error<
	"Neon vector size must be 64 or 128 bits">;
	def err_attribute_invalid_sve_type : Error<
	"%0 attribute applied to non-SVE type %1">;
	def err_attribute_bad_sve_vector_size : Error<
	"invalid SVE vector size '%0', must match value set by "
	"'-msve-vector-bits' ('%1')">;
	def err_attribute_arm_feature_sve_bits_unsupported : Error<
	"%0 is only supported when '-msve-vector-bits=<bits>' is specified with a "
	"value of 128, 256, 512, 1024 or 2048.">;
	def err_sve_vector_in_non_sve_target : Error<
	"SVE vector type %0 cannot be used in a target without sve">;
	def err_attribute_requires_positive_integer : Error<
	"%0 attribute requires a %select{positive\|non-negative}1 "
	"integral compile time constant expression">;
	def err_attribute_requires_opencl_version : Error<
	"attribute %0 is supported in the OpenCL version %1%select{\| onwards}2">;
	def err_invalid_branch_protection_spec : Error<
	"invalid or misplaced branch protection specification '%0'">;
	def warn_unsupported_branch_protection_spec : Warning<
	"unsupported branch protection specification '%0'">, InGroup<BranchProtection>;

	def warn_unsupported_target_attribute
	: Warning<"%select{unsupported\|duplicate\|unknown}0%select{\| CPU\|"
	" tune CPU}1 '%2' in the '%select{target\|target_clones\|target_version}3' "
	"attribute string; '%select{target\|target_clones\|target_version}3' "
	"attribute ignored">,
	InGroup<IgnoredAttributes>;
	def err_attribute_unsupported
	: Error<"%0 attribute is not supported on targets missing %1;"
	" specify an appropriate -march= or -mcpu=">;
	// The err_*_attribute_argument_not_int are separate because they're used by
	// VerifyIntegerConstantExpression.
	def err_aligned_attribute_argument_not_int : Error<
	"'aligned' attribute requires integer constant">;
	def err_align_value_attribute_argument_not_int : Error<
	"'align_value' attribute requires integer constant">;
	def err_alignas_attribute_wrong_decl_type : Error<
	"%0 attribute cannot be applied to %select{a function parameter\|"
	"a variable with 'register' storage class\|a 'catch' variable\|a bit-field\|"
	"an enumeration}1">;
	def err_alignas_missing_on_definition : Error<
	"%0 must be specified on definition if it is specified on any declaration">;
	def note_alignas_on_declaration : Note<"declared with %0 attribute here">;
	def err_alignas_mismatch : Error<
	"redeclaration has different alignment requirement (%1 vs %0)">;
	def err_alignas_underaligned : Error<
	"requested alignment is less than minimum alignment of %1 for type %0">;
	def warn_aligned_attr_underaligned : Warning<err_alignas_underaligned.Summary>,
	InGroup<IgnoredAttributes>;
	def err_attribute_sizeless_type : Error<
	"%0 attribute cannot be applied to sizeless type %1">;
	def err_attribute_argument_n_type : Error<
	"%0 attribute requires parameter %1 to be %select{int or bool\|an integer "
	"constant\|a string\|an identifier\|a constant expression\|a builtin function}2">;
	def err_attribute_argument_type : Error<
	"%0 attribute requires %select{int or bool\|an integer "
	"constant\|a string\|an identifier}1">;
	def err_attribute_argument_out_of_range : Error<
	"%0 attribute requires integer constant between %1 and %2 inclusive">;
	def err_init_priority_object_attr : Error<
	"can only use 'init_priority' attribute on file-scope definitions "
	"of objects of class type">;
	def err_attribute_argument_out_of_bounds : Error<
	"%0 attribute parameter %1 is out of bounds">;
	def err_attribute_only_once_per_parameter : Error<
	"%0 attribute can only be applied once per parameter">;
	def err_mismatched_uuid : Error<"uuid does not match previous declaration">;
	def note_previous_uuid : Note<"previous uuid specified here">;
	def warn_attribute_pointers_only : Warning<
	"%0 attribute only applies to%select{\| constant}1 pointer arguments">,
	InGroup<IgnoredAttributes>;
	def err_attribute_pointers_only : Error<warn_attribute_pointers_only.Summary>;
	def err_attribute_integers_only : Error<
	"%0 attribute argument may only refer to a function parameter of integer "
	"type">;
	def warn_attribute_return_pointers_only : Warning<
	"%0 attribute only applies to return values that are pointers">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_return_pointers_refs_only : Warning<
	"%0 attribute only applies to return values that are pointers or references">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_pointer_or_reference_only : Warning<
	"%0 attribute only applies to a pointer or reference (%1 is invalid)">,
	InGroup<IgnoredAttributes>;
	def err_attribute_no_member_pointers : Error<
	"%0 attribute cannot be used with pointers to members">;
	def err_attribute_invalid_implicit_this_argument : Error<
	"%0 attribute is invalid for the implicit this argument">;
	def err_ownership_type : Error<
	"%0 attribute only applies to %select{pointer\|integer}1 arguments">;
	def err_ownership_returns_index_mismatch : Error<
	"'ownership_returns' attribute index does not match; here it is %0">;
	def note_ownership_returns_index_mismatch : Note<
	"declared with index %0 here">;
	def err_format_strftime_third_parameter : Error<
	"strftime format attribute requires 3rd parameter to be 0">;
	def err_format_attribute_not : Error<"format argument not a string type">;
	def err_format_attribute_result_not : Error<"function does not return %0">;
	def err_format_attribute_implicit_this_format_string : Error<
	"format attribute cannot specify the implicit this argument as the format "
	"string">;
	def err_callback_attribute_no_callee : Error<
	"'callback' attribute specifies no callback callee">;
	def err_callback_attribute_invalid_callee : Error<
	"'callback' attribute specifies invalid callback callee">;
	def err_callback_attribute_multiple : Error<
	"multiple 'callback' attributes specified">;
	def err_callback_attribute_argument_unknown : Error<
	"'callback' attribute argument %0 is not a known function parameter">;
	def err_callback_callee_no_function_type : Error<
	"'callback' attribute callee does not have function type">;
	def err_callback_callee_is_variadic : Error<
	"'callback' attribute callee may not be variadic">;
	def err_callback_implicit_this_not_available : Error<
	"'callback' argument at position %0 references unavailable implicit 'this'">;
	def err_init_method_bad_return_type : Error<
	"init methods must return an object pointer type, not %0">;
	def err_attribute_invalid_size : Error<
	"vector size not an integral multiple of component size">;
	def err_attribute_zero_size : Error<"zero %0 size">;
	def err_attribute_size_too_large : Error<"%0 size too large">;
	def err_typecheck_sve_ambiguous : Error<
	"cannot combine fixed-length and sizeless SVE vectors in expression, result is ambiguous (%0 and %1)">;
	def err_typecheck_sve_gnu_ambiguous : Error<
	"cannot combine GNU and SVE vectors in expression, result is ambiguous (%0 and %1)">;
	def err_typecheck_vector_not_convertable_implict_truncation : Error<
	"cannot convert between %select{scalar\|vector}0 type %1 and vector type"
	" %2 as implicit conversion would cause truncation">;
	def err_typecheck_vector_not_convertable : Error<
	"cannot convert between vector values of different size (%0 and %1)">;
	def err_typecheck_vector_not_convertable_non_scalar : Error<
	"cannot convert between vector and non-scalar values (%0 and %1)">;
	def err_typecheck_vector_lengths_not_equal : Error<
	"vector operands do not have the same number of elements (%0 and %1)">;
	def warn_typecheck_vector_element_sizes_not_equal : Warning<
	"vector operands do not have the same elements sizes (%0 and %1)">,
	InGroup<DiagGroup<"vec-elem-size">>, DefaultError;
	def err_ext_vector_component_exceeds_length : Error<
	"vector component access exceeds type %0">;
	def err_ext_vector_component_name_illegal : Error<
	"illegal vector component name '%0'">;
	def err_attribute_address_space_negative : Error<
	"address space is negative">;
	def err_attribute_address_space_too_high : Error<
	"address space is larger than the maximum supported (%0)">;
	def err_attribute_address_multiple_qualifiers : Error<
	"multiple address spaces specified for type">;
	def warn_attribute_address_multiple_identical_qualifiers : Warning<
	"multiple identical address spaces specified for type">,
	InGroup<DuplicateDeclSpecifier>;
	def err_attribute_not_clinkage : Error<
	"function type with %0 attribute must have C linkage">;
	def err_function_decl_cmse_ns_call : Error<
	"functions may not be declared with 'cmse_nonsecure_call' attribute">;
	def err_attribute_address_function_type : Error<
	"function type may not be qualified with an address space">;
	def err_as_qualified_auto_decl : Error<
	"automatic variable qualified with an%select{\| invalid}0 address space">;
	def err_arg_with_address_space : Error<
	"parameter may not be qualified with an address space">;
	def err_field_with_address_space : Error<
	"field may not be qualified with an address space">;
	def err_compound_literal_with_address_space : Error<
	"compound literal in function scope may not be qualified with an address space">;
	def err_address_space_mismatch_templ_inst : Error<
	"conflicting address space qualifiers are provided between types %0 and %1">;
	def err_attr_objc_ownership_redundant : Error<
	"the type %0 is already explicitly ownership-qualified">;
	def err_invalid_nsnumber_type : Error<
	"%0 is not a valid literal type for NSNumber">;
	def err_objc_illegal_boxed_expression_type : Error<
	"illegal type %0 used in a boxed expression">;
	def err_objc_non_trivially_copyable_boxed_expression_type : Error<
	"non-trivially copyable type %0 cannot be used in a boxed expression">;
	def err_objc_incomplete_boxed_expression_type : Error<
	"incomplete type %0 used in a boxed expression">;
	def err_undeclared_objc_literal_class : Error<
	"definition of class %0 must be available to use Objective-C "
	"%select{array literals\|dictionary literals\|numeric literals\|boxed expressions\|"
	"string literals}1">;
	def err_undeclared_boxing_method : Error<
	"declaration of %0 is missing in %1 class">;
	def err_objc_literal_method_sig : Error<
	"literal construction method %0 has incompatible signature">;
	def note_objc_literal_method_param : Note<
	"%select{first\|second\|third}0 parameter has unexpected type %1 "
	"(should be %2)">;
	def note_objc_literal_method_return : Note<
	"method returns unexpected type %0 (should be an object type)">;
	def err_invalid_collection_element : Error<
	"collection element of type %0 is not an Objective-C object">;
	def err_box_literal_collection : Error<
	"%select{string\|character\|boolean\|numeric}0 literal must be prefixed by '@' "
	"in a collection">;
	def warn_objc_literal_comparison : Warning<
	"direct comparison of %select{an array literal\|a dictionary literal\|"
	"a numeric literal\|a boxed expression\|}0 has undefined behavior">,
	InGroup<ObjCLiteralComparison>;
	def err_missing_atsign_prefix : Error<
	"%select{string\|numeric}0 literal must be prefixed by '@'">;
	def warn_objc_string_literal_comparison : Warning<
	"direct comparison of a string literal has undefined behavior">,
	InGroup<ObjCStringComparison>;
	def warn_concatenated_literal_array_init : Warning<
	"suspicious concatenation of string literals in an array initialization; "
	"did you mean to separate the elements with a comma?">,
	InGroup<StringConcatation>, DefaultIgnore;
	def warn_concatenated_nsarray_literal : Warning<
	"concatenated NSString literal for an NSArray expression - "
	"possibly missing a comma">,
	InGroup<ObjCStringConcatenation>;
	def note_objc_literal_comparison_isequal : Note<
	"use 'isEqual:' instead">;
	def warn_objc_collection_literal_element : Warning<
	"object of type %0 is not compatible with "
	"%select{array element type\|dictionary key type\|dictionary value type}1 %2">,
	InGroup<ObjCLiteralConversion>;
	def warn_nsdictionary_duplicate_key : Warning<
	"duplicate key in dictionary literal">,
	InGroup<DiagGroup<"objc-dictionary-duplicate-keys">>;
	def note_nsdictionary_duplicate_key_here : Note<
	"previous equal key is here">;
	def err_swift_param_attr_not_swiftcall : Error<
	"'%0' parameter can only be used with swiftcall%select{ or swiftasynccall\|}1 "
	"calling convention%select{\|s}1">;
	def err_swift_indirect_result_not_first : Error<
	"'swift_indirect_result' parameters must be first parameters of function">;
	def err_swift_error_result_not_after_swift_context : Error<
	"'swift_error_result' parameter must follow 'swift_context' parameter">;
	def err_swift_abi_parameter_wrong_type : Error<
	"'%0' parameter must have pointer%select{\| to unqualified pointer}1 type; "
	"type here is %2">;

	def err_attribute_argument_invalid : Error<
	"%0 attribute argument is invalid: %select{max must be 0 since min is 0\|"
	"min must not be greater than max}1">;
	def err_attribute_argument_is_zero : Error<
	"%0 attribute must be greater than 0">;
	def warn_attribute_argument_n_negative : Warning<
	"%0 attribute parameter %1 is negative and will be ignored">,
	InGroup<CudaCompat>;
	def err_property_function_in_objc_container : Error<
	"use of Objective-C property in function nested in Objective-C "
	"container not supported, move function outside its container">;

	let CategoryName = "Cocoa API Issue" in {
	def warn_objc_redundant_literal_use : Warning<
	"using %0 with a literal is redundant">, InGroup<ObjCRedundantLiteralUse>;
	}

	def err_attr_tlsmodel_arg : Error<"tls_model must be \"global-dynamic\", "
	"\"local-dynamic\", \"initial-exec\" or \"local-exec\"">;

	def err_aix_attr_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">;

	def err_tls_var_aligned_over_maximum : Error<
	"alignment (%0) of thread-local variable %1 is greater than the maximum supported "
	"alignment (%2) for a thread-local variable on this target">;

	def err_only_annotate_after_access_spec : Error<
	"access specifier can only have annotation attributes">;

	def err_attribute_section_invalid_for_target : Error<
	"argument to %select{'code_seg'\|'section'}1 attribute is not valid for this target: %0">;
	def err_pragma_section_invalid_for_target : Error<
	"argument to #pragma section is not valid for this target: %0">;
	def warn_attribute_section_drectve : Warning<
	"#pragma %0(\".drectve\") has undefined behavior, "
	"use #pragma comment(linker, ...) instead">, InGroup<MicrosoftDrectveSection>;
	def warn_mismatched_section : Warning<
	"%select{codeseg\|section}0 does not match previous declaration">, InGroup<Section>;
	def warn_attribute_section_on_redeclaration : Warning<
	"section attribute is specified on redeclared variable">, InGroup<Section>;
	def err_mismatched_code_seg_base : Error<
	"derived class must specify the same code segment as its base classes">;
	def err_mismatched_code_seg_override : Error<
	"overriding virtual function must specify the same code segment as its overridden function">;
	def err_conflicting_codeseg_attribute : Error<
	"conflicting code segment specifiers">;
	def warn_duplicate_codeseg_attribute : Warning<
	"duplicate code segment specifiers">, InGroup<Section>;

	def err_anonymous_property: Error<
	"anonymous property is not supported">;
	def err_property_is_variably_modified : Error<
	"property %0 has a variably modified type">;
	def err_no_accessor_for_property : Error<
	"no %select{getter\|setter}0 defined for property %1">;
	def err_cannot_find_suitable_accessor : Error<
	"cannot find suitable %select{getter\|setter}0 for property %1">;

	def warn_alloca : Warning<
	"use of function %0 is discouraged; there is no way to check for failure but "
	"failure may still occur, resulting in a possibly exploitable security vulnerability">,
	InGroup<DiagGroup<"alloca">>, DefaultIgnore;

	def warn_alloca_align_alignof : Warning<
	"second argument to __builtin_alloca_with_align is supposed to be in bits">,
	InGroup<DiagGroup<"alloca-with-align-alignof">>;

	def err_alignment_too_small : Error<
	"requested alignment must be %0 or greater">;
	def err_alignment_too_big : Error<
	"requested alignment must be %0 or smaller">;
	def err_alignment_not_power_of_two : Error<
	"requested alignment is not a power of 2">;
	def warn_alignment_not_power_of_two : Warning<
	err_alignment_not_power_of_two.Summary>,
	InGroup<DiagGroup<"non-power-of-two-alignment">>;
	def err_alignment_dependent_typedef_name : Error<
	"requested alignment is dependent but declaration is not dependent">;

	def warn_alignment_builtin_useless : Warning<
	"%select{aligning a value\|the result of checking whether a value is aligned}0"
	" to 1 byte is %select{a no-op\|always true}0">, InGroup<TautologicalCompare>;
	def err_attribute_aligned_too_great : Error<
	"requested alignment must be %0 bytes or smaller">;
	def warn_assume_aligned_too_great
	: Warning<"requested alignment must be %0 bytes or smaller; maximum "
	"alignment assumed">,
	InGroup<DiagGroup<"builtin-assume-aligned-alignment">>;
	def warn_not_xl_compatible
	: Warning<"alignment of 16 bytes for a struct member is not binary "
	"compatible with IBM XL C/C++ for AIX 16.1.0 or older">,
	InGroup<AIXCompat>;
	def note_misaligned_member_used_here : Note<
	"passing byval argument %0 with potentially incompatible alignment here">;
	def warn_redeclaration_without_attribute_prev_attribute_ignored : Warning<
	"%q0 redeclared without %1 attribute: previous %1 ignored">,
	InGroup<MicrosoftInconsistentDllImport>;
	def warn_redeclaration_without_import_attribute : Warning<
	"%q0 redeclared without 'dllimport' attribute: 'dllexport' attribute added">,
	InGroup<MicrosoftInconsistentDllImport>;
	def warn_dllimport_dropped_from_inline_function : Warning<
	"%q0 redeclared inline; %1 attribute ignored">,
	InGroup<IgnoredAttributes>;
	def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
	" exception specification; attribute ignored">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_ignored_on_non_definition :
	Warning<"%0 attribute ignored on a non-definition declaration">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_ignored_on_inline :
	Warning<"%0 attribute ignored on inline function">,
	InGroup<IgnoredAttributes>;
	def warn_nocf_check_attribute_ignored :
	Warning<"'nocf_check' attribute ignored; use -fcf-protection to enable the attribute">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_after_definition_ignored : Warning<
	"attribute %0 after definition is ignored">,
	InGroup<IgnoredAttributes>;
	def warn_attributes_likelihood_ifstmt_conflict
	: Warning<"conflicting attributes %0 are ignored">,
	InGroup<IgnoredAttributes>;
	def warn_cxx11_gnu_attribute_on_type : Warning<
	"attribute %0 ignored, because it cannot be applied to a type">,
	InGroup<IgnoredAttributes>;
	def warn_unhandled_ms_attribute_ignored : Warning<
	"__declspec attribute %0 is not supported">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_has_no_effect_on_infinite_loop : Warning<
	"attribute %0 has no effect when annotating an infinite loop">,
	InGroup<IgnoredAttributes>;
	def note_attribute_has_no_effect_on_infinite_loop_here : Note<
	"annotating the infinite loop here">;
	def warn_attribute_has_no_effect_on_compile_time_if : Warning<
	"attribute %0 has no effect when annotating an 'if %select{constexpr\|consteval}1' statement">,
	InGroup<IgnoredAttributes>;
	def note_attribute_has_no_effect_on_compile_time_if_here : Note<
	"annotating the 'if %select{constexpr\|consteval}0' statement here">;
	def err_decl_attribute_invalid_on_stmt : Error<
	"%0 attribute cannot be applied to a statement">;
	def err_attribute_invalid_on_decl : Error<
	"%0 attribute cannot be applied to a declaration">;
	def warn_type_attribute_deprecated_on_decl : Warning<
	"applying attribute %0 to a declaration is deprecated; apply it to the type instead">,
	InGroup<DeprecatedAttributes>;
	def warn_declspec_attribute_ignored : Warning<
	"attribute %0 is ignored, place it after "
	"\"%select{class\|struct\|interface\|union\|enum}1\" to apply attribute to "
	"type declaration">, InGroup<IgnoredAttributes>;
	def warn_attribute_precede_definition : Warning<
	"attribute declaration must precede definition">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_void_function_method : Warning<
	"attribute %0 cannot be applied to "
	"%select{functions\|Objective-C method}1 without return value">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_weak_on_field : Warning<
	"__weak attribute cannot be specified on a field declaration">,
	InGroup<IgnoredAttributes>;
	def warn_gc_attribute_weak_on_local : Warning<
	"Objective-C GC does not allow weak variables on the stack">,
	InGroup<IgnoredAttributes>;
	def warn_nsobject_attribute : Warning<
	"'NSObject' attribute may be put on a typedef only; attribute is ignored">,
	InGroup<NSobjectAttribute>;
	def warn_independentclass_attribute : Warning<
	"'objc_independent_class' attribute may be put on a typedef only; "
	"attribute is ignored">,
	InGroup<IndependentClassAttribute>;
	def warn_ptr_independentclass_attribute : Warning<
	"'objc_independent_class' attribute may be put on Objective-C object "
	"pointer type only; attribute is ignored">,
	InGroup<IndependentClassAttribute>;
	def warn_attribute_weak_on_local : Warning<
	"__weak attribute cannot be specified on an automatic variable when ARC "
	"is not enabled">,
	InGroup<IgnoredAttributes>;
	def warn_weak_identifier_undeclared : Warning<
	"weak identifier %0 never declared">;
	def warn_attribute_cmse_entry_static : Warning<
	"'cmse_nonsecure_entry' cannot be applied to functions with internal linkage">,
	InGroup<IgnoredAttributes>;
	def warn_cmse_nonsecure_union : Warning<
	"passing union across security boundary via %select{parameter %1\|return value}0 "
	"may leak information">,
	InGroup<DiagGroup<"cmse-union-leak">>;
	def err_attribute_weak_static : Error<
	"weak declaration cannot have internal linkage">;
	def err_attribute_selectany_non_extern_data : Error<
	"'selectany' can only be applied to data items with external linkage">;
	def err_declspec_thread_on_thread_variable : Error<
	"'__declspec(thread)' applied to variable that already has a "
	"thread-local storage specifier">;
	def err_attribute_dll_not_extern : Error<
	"%q0 must have external linkage when declared %q1">;
	def err_attribute_dll_thread_local : Error<
	"%q0 cannot be thread local when declared %q1">;
	def err_attribute_dll_lambda : Error<
	"lambda cannot be declared %0">;
	def warn_attribute_invalid_on_definition : Warning<
	"'%0' attribute cannot be specified on a definition">,
	InGroup<IgnoredAttributes>;
	def err_attribute_dll_redeclaration : Error<
	"redeclaration of %q0 cannot add %q1 attribute">;
	def warn_attribute_dll_redeclaration : Warning<
	"redeclaration of %q0 should not add %q1 attribute">,
	InGroup<DiagGroup<"dll-attribute-on-redeclaration">>;
	def err_attribute_dllimport_function_definition : Error<
	"dllimport cannot be applied to non-inline function definition">;
	def err_attribute_dllimport_function_specialization_definition : Error<
	"cannot define non-inline dllimport template specialization">;
	def err_attribute_dll_deleted : Error<
	"attribute %q0 cannot be applied to a deleted function">;
	def err_attribute_dllimport_data_definition : Error<
	"definition of dllimport data">;
	def err_attribute_dllimport_static_field_definition : Error<
	"definition of dllimport static field not allowed">;
	def warn_attribute_dllimport_static_field_definition : Warning<
	"definition of dllimport static field">,
	InGroup<DiagGroup<"dllimport-static-field-def">>;
	def warn_attribute_dllexport_explicit_instantiation_decl : Warning<
	"explicit instantiation declaration should not be 'dllexport'">,
	InGroup<DllexportExplicitInstantiationDecl>;
	def warn_attribute_dllexport_explicit_instantiation_def : Warning<
	"'dllexport' attribute ignored on explicit instantiation definition">,
	InGroup<IgnoredAttributes>;
	def warn_invalid_initializer_from_system_header : Warning<
	"invalid constructor from class in system header, should not be explicit">,
	InGroup<DiagGroup<"invalid-initializer-from-system-header">>;
	def note_used_in_initialization_here : Note<"used in initialization here">;
	def err_attribute_dll_member_of_dll_class : Error<
	"attribute %q0 cannot be applied to member of %q1 class">;
	def warn_attribute_dll_instantiated_base_class : Warning<
	"propagating dll attribute to %select{already instantiated\|explicitly specialized}0 "
	"base class template without dll attribute is not supported">,
	InGroup<DiagGroup<"unsupported-dll-base-class-template">>, DefaultIgnore;
	def err_attribute_dll_ambiguous_default_ctor : Error<
	"'__declspec(dllexport)' cannot be applied to more than one default constructor in %0">;
	def err_attribute_weakref_not_static : Error<
	"weakref declaration must have internal linkage">;
	def err_attribute_weakref_not_global_context : Error<
	"weakref declaration of %0 must be in a global context">;
	def err_attribute_weakref_without_alias : Error<
	"weakref declaration of %0 must also have an alias attribute">;
	def err_alias_not_supported_on_darwin : Error <
	"aliases are not supported on darwin">;
	def warn_attribute_wrong_decl_type_str : Warning<
	"%0 attribute only applies to %1">, InGroup<IgnoredAttributes>;
	def err_attribute_wrong_decl_type_str : Error<
	warn_attribute_wrong_decl_type_str.Summary>;
	def warn_attribute_wrong_decl_type : Warning<
	"%0 attribute only applies to %select{"
	"functions"
	"\|unions"
	"\|variables and functions"
	"\|functions and methods"
	"\|functions, methods and blocks"
	"\|functions, methods, and parameters"
	"\|variables"
	"\|variables and fields"
	"\|variables, data members and tag types"
	"\|types and namespaces"
	"\|variables, functions and classes"
	"\|kernel functions"
	"\|non-K&R-style functions}1">,
	InGroup<IgnoredAttributes>;
	def err_attribute_wrong_decl_type : Error<warn_attribute_wrong_decl_type.Summary>;
	def warn_type_attribute_wrong_type : Warning<
	"'%0' only applies to %select{function\|pointer\|"
	"Objective-C object or block pointer}1 types; type here is %2">,
	InGroup<IgnoredAttributes>;
	def warn_incomplete_encoded_type : Warning<
	"encoding of %0 type is incomplete because %1 component has unknown encoding">,
	InGroup<DiagGroup<"encode-type">>;
	def warn_gnu_inline_attribute_requires_inline : Warning<
	"'gnu_inline' attribute requires function to be marked 'inline',"
	" attribute ignored">,
	InGroup<IgnoredAttributes>;
	def warn_gnu_inline_cplusplus_without_extern : Warning<
	"'gnu_inline' attribute without 'extern' in C++ treated as externally"
	" available, this changed in Clang 10">,
	InGroup<DiagGroup<"gnu-inline-cpp-without-extern">>;
	def err_attribute_vecreturn_only_vector_member : Error<
	"the vecreturn attribute can only be used on a class or structure with one member, which must be a vector">;
	def err_attribute_vecreturn_only_pod_record : Error<
	"the vecreturn attribute can only be used on a POD (plain old data) class or structure (i.e. no virtual functions)">;
	def err_cconv_change : Error<
	"function declared '%0' here was previously declared "
	"%select{'%2'\|without calling convention}1">;
	def warn_cconv_unsupported : Warning<
	"%0 calling convention is not supported %select{"
	// Use CallingConventionIgnoredReason Enum to specify these.
	"for this target"
	"\|on variadic function"
	"\|on constructor/destructor"
	"\|on builtin function"
	"}1">,
	InGroup<IgnoredAttributes>;
	def error_cconv_unsupported : Error<warn_cconv_unsupported.Summary>;
	def err_cconv_knr : Error<
	"function with no prototype cannot use the %0 calling convention">;
	def warn_cconv_knr : Warning<
	err_cconv_knr.Summary>,
	InGroup<DiagGroup<"missing-prototype-for-cc">>;
	def err_cconv_varargs : Error<
	"variadic function cannot use %0 calling convention">;
	def err_regparm_mismatch : Error<"function declared with regparm(%0) "
	"attribute was previously declared "
	"%plural{0:without the regparm\|:with the regparm(%1)}1 attribute">;
	def err_function_attribute_mismatch : Error<
	"function declared with %0 attribute "
	"was previously declared without the %0 attribute">;
	def err_objc_precise_lifetime_bad_type : Error<
	"objc_precise_lifetime only applies to retainable types; type here is %0">;
	def warn_objc_precise_lifetime_meaningless : Error<
	"objc_precise_lifetime is not meaningful for "
	"%select{__unsafe_unretained\|__autoreleasing}0 objects">;
	def err_invalid_pcs : Error<"invalid PCS type">;
	def warn_attribute_not_on_decl : Warning<
	"%0 attribute ignored when parsing type">, InGroup<IgnoredAttributes>;
	def err_base_specifier_attribute : Error<
	"%0 attribute cannot be applied to a base specifier">;
	def warn_declspec_allocator_nonpointer : Warning<
	"ignoring __declspec(allocator) because the function return type %0 is not "
	"a pointer or reference type">, InGroup<IgnoredAttributes>;
	def err_cconv_incomplete_param_type : Error<
	"parameter %0 must have a complete type to use function %1 with the %2 "
	"calling convention">;
	def err_attribute_output_parameter : Error<
	"attribute only applies to output parameters">;

	def ext_cannot_use_trivial_abi : ExtWarn<
	"'trivial_abi' cannot be applied to %0">, InGroup<IgnoredAttributes>;
	def note_cannot_use_trivial_abi_reason : Note<
	"'trivial_abi' is disallowed on %0 because %select{"
	"its copy constructors and move constructors are all deleted\|"
	"it is polymorphic\|"
	"it has a base of a non-trivial class type\|it has a virtual base\|"
	"it has a __weak field\|it has a field of a non-trivial class type}1">;

	// Availability attribute
	def warn_availability_unknown_platform : Warning<
	"unknown platform %0 in availability macro">, InGroup<Availability>;
	def warn_availability_version_ordering : Warning<
	"feature cannot be %select{introduced\|deprecated\|obsoleted}0 in %1 version "
	"%2 before it was %select{introduced\|deprecated\|obsoleted}3 in version %4; "
	"attribute ignored">, InGroup<Availability>;
	def warn_mismatched_availability: Warning<
	"availability does not match previous declaration">, InGroup<Availability>;
	def warn_mismatched_availability_override : Warning<
	"%select{\|overriding }4method %select{introduced after\|"
	"deprecated before\|obsoleted before}0 "
	"%select{the protocol method it implements\|overridden method}4 "
	"on %1 (%2 vs. %3)">, InGroup<Availability>;
	def warn_mismatched_availability_override_unavail : Warning<
	"%select{\|overriding }1method cannot be unavailable on %0 when "
	"%select{the protocol method it implements\|its overridden method}1 is "
	"available">,
	InGroup<Availability>;
	def warn_availability_on_static_initializer : Warning<
	"ignoring availability attribute %select{on '+load' method\|"
	"with constructor attribute\|with destructor attribute}0">,
	InGroup<Availability>;
	def note_overridden_method : Note<
	"overridden method is here">;
	def warn_availability_swift_unavailable_deprecated_only : Warning<
	"only 'unavailable' and 'deprecated' are supported for Swift availability">,
	InGroup<Availability>;
	def note_protocol_method : Note<
	"protocol method is here">;
	def warn_availability_fuchsia_unavailable_minor : Warning<
	"Fuchsia API Level prohibits specifying a minor or sub-minor version">,
	InGroup<Availability>;

	def warn_unguarded_availability :
	Warning<"%0 is only available on %1 %2 or newer">,
	InGroup<UnguardedAvailability>, DefaultIgnore;
	def warn_unguarded_availability_new :
	Warning<warn_unguarded_availability.Summary>,
	InGroup<UnguardedAvailabilityNew>;
	def note_decl_unguarded_availability_silence : Note<
	"annotate %select{%1\|anonymous %1}0 with an availability attribute to silence this warning">;
	def note_unguarded_available_silence : Note<
	"enclose %0 in %select{an @available\|a __builtin_available}1 check to silence"
	" this warning">;
	def warn_at_available_unchecked_use : Warning<
	"%select{@available\|__builtin_available}0 does not guard availability here; "
	"use if (%select{@available\|__builtin_available}0) instead">,
	InGroup<DiagGroup<"unsupported-availability-guard">>;

	def warn_missing_sdksettings_for_availability_checking : Warning<
	"%0 availability is ignored without a valid 'SDKSettings.json' in the SDK">,
	InGroup<DiagGroup<"ignored-availability-without-sdk-settings">>;

	// Thread Safety Attributes
	def warn_thread_attribute_ignored : Warning<
	"ignoring %0 attribute because its argument is invalid">,
	InGroup<ThreadSafetyAttributes>, DefaultIgnore;
	def warn_thread_attribute_not_on_non_static_member : Warning<
	"%0 attribute without capability arguments can only be applied to non-static "
	"methods of a class">,
	InGroup<ThreadSafetyAttributes>, DefaultIgnore;
	def warn_thread_attribute_not_on_capability_member : Warning<
	"%0 attribute without capability arguments refers to 'this', but %1 isn't "
	"annotated with 'capability' or 'scoped_lockable' attribute">,
	InGroup<ThreadSafetyAttributes>, DefaultIgnore;
	def warn_thread_attribute_argument_not_lockable : Warning<
	"%0 attribute requires arguments whose type is annotated "
	"with 'capability' attribute; type here is %1">,
	InGroup<ThreadSafetyAttributes>, DefaultIgnore;
	def warn_thread_attribute_decl_not_lockable : Warning<
	"%0 attribute can only be applied in a context annotated "
	"with 'capability' attribute">,
	InGroup<ThreadSafetyAttributes>, DefaultIgnore;
	def warn_thread_attribute_decl_not_pointer : Warning<
	"%0 only applies to pointer types; type here is %1">,
	InGroup<ThreadSafetyAttributes>, DefaultIgnore;
	def err_attribute_argument_out_of_bounds_extra_info : Error<
	"%0 attribute parameter %1 is out of bounds: "
	"%plural{0:no parameters to index into\|"
	"1:can only be 1, since there is one parameter\|"
	":must be between 1 and %2}2">;

	// Thread Safety Analysis
	def warn_unlock_but_no_lock : Warning<"releasing %0 '%1' that was not held">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_unlock_kind_mismatch : Warning<
	"releasing %0 '%1' using %select{shared\|exclusive}2 access, expected "
	"%select{shared\|exclusive}3 access">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_double_lock : Warning<"acquiring %0 '%1' that is already held">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_no_unlock : Warning<
	"%0 '%1' is still held at the end of function">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_expecting_locked : Warning<
	"expecting %0 '%1' to be held at the end of function">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	// FIXME: improve the error message about locks not in scope
	def warn_lock_some_predecessors : Warning<
	"%0 '%1' is not held on every path through here">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_expecting_lock_held_on_loop : Warning<
	"expecting %0 '%1' to be held at start of each loop">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def note_locked_here : Note<"%0 acquired here">;
	def note_unlocked_here : Note<"%0 released here">;
	def warn_lock_exclusive_and_shared : Warning<
	"%0 '%1' is acquired exclusively and shared in the same scope">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def note_lock_exclusive_and_shared : Note<
	"the other acquisition of %0 '%1' is here">;
	def warn_variable_requires_any_lock : Warning<
	"%select{reading\|writing}1 variable %0 requires holding "
	"%select{any mutex\|any mutex exclusively}1">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_var_deref_requires_any_lock : Warning<
	"%select{reading\|writing}1 the value pointed to by %0 requires holding "
	"%select{any mutex\|any mutex exclusively}1">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_fun_excludes_mutex : Warning<
	"cannot call function '%1' while %0 '%2' is held">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_cannot_resolve_lock : Warning<
	"cannot resolve lock expression">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_acquired_before : Warning<
	"%0 '%1' must be acquired before '%2'">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_acquired_before_after_cycle : Warning<
	"Cycle in acquired_before/after dependencies, starting with '%0'">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;


	// Thread safety warnings negative capabilities
	def warn_acquire_requires_negative_cap : Warning<
	"acquiring %0 '%1' requires negative capability '%2'">,
	InGroup<ThreadSafetyNegative>, DefaultIgnore;
	def warn_fun_requires_negative_cap : Warning<
	"calling function %0 requires negative capability '%1'">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;

	// Thread safety warnings on pass by reference
	def warn_guarded_pass_by_reference : Warning<
	"passing variable %1 by reference requires holding %0 "
	"%select{'%2'\|'%2' exclusively}3">,
	InGroup<ThreadSafetyReference>, DefaultIgnore;
	def warn_pt_guarded_pass_by_reference : Warning<
	"passing the value that %1 points to by reference requires holding %0 "
	"%select{'%2'\|'%2' exclusively}3">,
	InGroup<ThreadSafetyReference>, DefaultIgnore;

	// Imprecise thread safety warnings
	def warn_variable_requires_lock : Warning<
	"%select{reading\|writing}3 variable %1 requires holding %0 "
	"%select{'%2'\|'%2' exclusively}3">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_var_deref_requires_lock : Warning<
	"%select{reading\|writing}3 the value pointed to by %1 requires "
	"holding %0 %select{'%2'\|'%2' exclusively}3">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;
	def warn_fun_requires_lock : Warning<
	"calling function %1 requires holding %0 %select{'%2'\|'%2' exclusively}3">,
	InGroup<ThreadSafetyAnalysis>, DefaultIgnore;

	// Precise thread safety warnings
	def warn_variable_requires_lock_precise :
	Warning<warn_variable_requires_lock.Summary>,
	InGroup<ThreadSafetyPrecise>, DefaultIgnore;
	def warn_var_deref_requires_lock_precise :
	Warning<warn_var_deref_requires_lock.Summary>,
	InGroup<ThreadSafetyPrecise>, DefaultIgnore;
	def warn_fun_requires_lock_precise :
	Warning<warn_fun_requires_lock.Summary>,
	InGroup<ThreadSafetyPrecise>, DefaultIgnore;
	def note_found_mutex_near_match : Note<"found near match '%0'">;

	// Verbose thread safety warnings
	def warn_thread_safety_verbose : Warning<"thread safety verbose warning">,
	InGroup<ThreadSafetyVerbose>, DefaultIgnore;
	def note_thread_warning_in_fun : Note<"thread warning in function %0">;
	def note_guarded_by_declared_here : Note<"guarded_by declared here">;

	// Dummy warning that will trigger "beta" warnings from the analysis if enabled.
	def warn_thread_safety_beta : Warning<"thread safety beta warning">,
	InGroup<ThreadSafetyBeta>, DefaultIgnore;

	// Consumed warnings
	def warn_use_in_invalid_state : Warning<
	"invalid invocation of method '%0' on object '%1' while it is in the '%2' "
	"state">, InGroup<Consumed>, DefaultIgnore;
	def warn_use_of_temp_in_invalid_state : Warning<
	"invalid invocation of method '%0' on a temporary object while it is in the "
	"'%1' state">, InGroup<Consumed>, DefaultIgnore;
	def warn_attr_on_unconsumable_class : Warning<
	"consumed analysis attribute is attached to member of class %0 which isn't "
	"marked as consumable">, InGroup<Consumed>, DefaultIgnore;
	def warn_return_typestate_for_unconsumable_type : Warning<
	"return state set for an unconsumable type '%0'">, InGroup<Consumed>,
	DefaultIgnore;
	def warn_return_typestate_mismatch : Warning<
	"return value not in expected state; expected '%0', observed '%1'">,
	InGroup<Consumed>, DefaultIgnore;
	def warn_loop_state_mismatch : Warning<
	"state of variable '%0' must match at the entry and exit of loop">,
	InGroup<Consumed>, DefaultIgnore;
	def warn_param_return_typestate_mismatch : Warning<
	"parameter '%0' not in expected state when the function returns: expected "
	"'%1', observed '%2'">, InGroup<Consumed>, DefaultIgnore;
	def warn_param_typestate_mismatch : Warning<
	"argument not in expected state; expected '%0', observed '%1'">,
	InGroup<Consumed>, DefaultIgnore;

	// no_sanitize attribute
	def warn_unknown_sanitizer_ignored : Warning<
	"unknown sanitizer '%0' ignored">, InGroup<UnknownSanitizers>;

	def warn_impcast_vector_scalar : Warning<
	"implicit conversion turns vector to scalar: %0 to %1">,
	InGroup<Conversion>, DefaultIgnore;
	def warn_impcast_complex_scalar : Warning<
	"implicit conversion discards imaginary component: %0 to %1">,
	InGroup<Conversion>, DefaultIgnore;
	def err_impcast_complex_scalar : Error<
	"implicit conversion from %0 to %1 is not permitted in C++">;
	def warn_impcast_float_precision : Warning<
	"implicit conversion loses floating-point precision: %0 to %1">,
	InGroup<ImplicitFloatConversion>, DefaultIgnore;
	def warn_impcast_float_result_precision : Warning<
	"implicit conversion when assigning computation result loses floating-point precision: %0 to %1">,
	InGroup<ImplicitFloatConversion>, DefaultIgnore;
	def warn_impcast_double_promotion : Warning<
	"implicit conversion increases floating-point precision: %0 to %1">,
	InGroup<DoublePromotion>, DefaultIgnore;
	def warn_impcast_integer_sign : Warning<
	"implicit conversion changes signedness: %0 to %1">,
	InGroup<SignConversion>, DefaultIgnore;
	def warn_impcast_integer_sign_conditional : Warning<
	"operand of ? changes signedness: %0 to %1">,
	InGroup<SignConversion>, DefaultIgnore;
	def warn_impcast_integer_precision : Warning<
	"implicit conversion loses integer precision: %0 to %1">,
	InGroup<ImplicitIntConversion>, DefaultIgnore;
	def warn_impcast_high_order_zero_bits : Warning<
	"higher order bits are zeroes after implicit conversion">,
	InGroup<ImplicitIntConversion>, DefaultIgnore;
	def warn_impcast_nonnegative_result : Warning<
	"the resulting value is always non-negative after implicit conversion">,
	InGroup<SignConversion>, DefaultIgnore;
	def warn_impcast_integer_64_32 : Warning<
	"implicit conversion loses integer precision: %0 to %1">,
	InGroup<Shorten64To32>, DefaultIgnore;
	def warn_impcast_integer_precision_constant : Warning<
	"implicit conversion from %2 to %3 changes value from %0 to %1">,
	InGroup<ConstantConversion>;
	def warn_impcast_single_bit_bitield_precision_constant : Warning<
	"implicit truncation from %2 to a one-bit wide bit-field changes value from "
	"%0 to %1">, InGroup<SingleBitBitFieldConstantConversion>;
	def warn_impcast_bitfield_precision_constant : Warning<
	"implicit truncation from %2 to bit-field changes value from %0 to %1">,
	InGroup<BitFieldConstantConversion>;
	def warn_impcast_constant_value_to_objc_bool : Warning<
	"implicit conversion from constant value %0 to 'BOOL'; "
	"the only well defined values for 'BOOL' are YES and NO">,
	InGroup<ObjCBoolConstantConversion>;

	def warn_impcast_fixed_point_range : Warning<
	"implicit conversion from %0 cannot fit within the range of values for %1">,
	InGroup<ImplicitFixedPointConversion>;

	def warn_impcast_literal_float_to_integer : Warning<
	"implicit conversion from %0 to %1 changes value from %2 to %3">,
	InGroup<LiteralConversion>;
	def warn_impcast_literal_float_to_integer_out_of_range : Warning<
	"implicit conversion of out of range value from %0 to %1 is undefined">,
	InGroup<LiteralConversion>;
	def warn_impcast_float_integer : Warning<
	"implicit conversion turns floating-point number into integer: %0 to %1">,
	InGroup<FloatConversion>, DefaultIgnore;
	def warn_impcast_float_to_objc_signed_char_bool : Warning<
	"implicit conversion from floating-point type %0 to 'BOOL'">,
	InGroup<ObjCSignedCharBoolImplicitFloatConversion>;
	def warn_impcast_int_to_objc_signed_char_bool : Warning<
	"implicit conversion from integral type %0 to 'BOOL'">,
	InGroup<ObjCSignedCharBoolImplicitIntConversion>, DefaultIgnore;

	// Implicit int -> float conversion precision loss warnings.
	def warn_impcast_integer_float_precision : Warning<
	"implicit conversion from %0 to %1 may lose precision">,
	InGroup<ImplicitIntFloatConversion>, DefaultIgnore;
	def warn_impcast_integer_float_precision_constant : Warning<
	"implicit conversion from %2 to %3 changes value from %0 to %1">,
	InGroup<ImplicitConstIntFloatConversion>;

	def warn_impcast_float_to_integer : Warning<
	"implicit conversion from %0 to %1 changes value from %2 to %3">,
	InGroup<FloatOverflowConversion>, DefaultIgnore;
	def warn_impcast_float_to_integer_out_of_range : Warning<
	"implicit conversion of out of range value from %0 to %1 is undefined">,
	InGroup<FloatOverflowConversion>, DefaultIgnore;
	def warn_impcast_float_to_integer_zero : Warning<
	"implicit conversion from %0 to %1 changes non-zero value from %2 to %3">,
	InGroup<FloatZeroConversion>, DefaultIgnore;

	def warn_impcast_string_literal_to_bool : Warning<
	"implicit conversion turns string literal into bool: %0 to %1">,
	InGroup<StringConversion>, DefaultIgnore;
	def warn_impcast_different_enum_types : Warning<
	"implicit conversion from enumeration type %0 to different enumeration type "
	"%1">, InGroup<EnumConversion>;
	def warn_impcast_bool_to_null_pointer : Warning<
	"initialization of pointer of type %0 to null from a constant boolean "
	"expression">, InGroup<BoolConversion>;
	def warn_non_literal_null_pointer : Warning<
	"expression which evaluates to zero treated as a null pointer constant of "
	"type %0">, InGroup<NonLiteralNullConversion>;
	def warn_pointer_compare : Warning<
	"comparing a pointer to a null character constant; did you mean "
	"to compare to %select{NULL\|(void *)0}0?">,
	InGroup<DiagGroup<"pointer-compare">>;
	def warn_impcast_null_pointer_to_integer : Warning<
	"implicit conversion of %select{NULL\|nullptr}0 constant to %1">,
	InGroup<NullConversion>;
	def warn_impcast_floating_point_to_bool : Warning<
	"implicit conversion turns floating-point number into bool: %0 to %1">,
	InGroup<ImplicitConversionFloatingPointToBool>;
	def ext_ms_impcast_fn_obj : ExtWarn<
	"implicit conversion between pointer-to-function and pointer-to-object is a "
	"Microsoft extension">, InGroup<MicrosoftCast>;

	def warn_impcast_pointer_to_bool : Warning<
	"address of%select{\| function\| array}0 '%1' will always evaluate to "
	"'true'">,
	InGroup<PointerBoolConversion>;
	def warn_cast_nonnull_to_bool : Warning<
	"nonnull %select{function call\|parameter}0 '%1' will evaluate to "
	"'true' on first encounter">,
	InGroup<PointerBoolConversion>;
	def warn_this_bool_conversion : Warning<
	"'this' pointer cannot be null in well-defined C++ code; pointer may be "
	"assumed to always convert to true">, InGroup<UndefinedBoolConversion>;
	def warn_address_of_reference_bool_conversion : Warning<
	"reference cannot be bound to dereferenced null pointer in well-defined C++ "
	"code; pointer may be assumed to always convert to true">,
	InGroup<UndefinedBoolConversion>;

	def warn_xor_used_as_pow : Warning<
	"result of '%0' is %1; did you mean exponentiation?">,
	InGroup<XorUsedAsPow>;
	def warn_xor_used_as_pow_base_extra : Warning<
	"result of '%0' is %1; did you mean '%2' (%3)?">,
	InGroup<XorUsedAsPow>;
	def warn_xor_used_as_pow_base : Warning<
	"result of '%0' is %1; did you mean '%2'?">,
	InGroup<XorUsedAsPow>;
	def note_xor_used_as_pow_silence : Note<
	"replace expression with '%0' %select{\|or use 'xor' instead of '^' }1to silence this warning">;

	def warn_null_pointer_compare : Warning<
	"comparison of %select{address of\|function\|array}0 '%1' %select{not \|}2"
	"equal to a null pointer is always %select{true\|false}2">,
	InGroup<TautologicalPointerCompare>;
	def warn_nonnull_expr_compare : Warning<
	"comparison of nonnull %select{function call\|parameter}0 '%1' "
	"%select{not \|}2equal to a null pointer is '%select{true\|false}2' on first "
	"encounter">,
	InGroup<TautologicalPointerCompare>;
	def warn_this_null_compare : Warning<
	"'this' pointer cannot be null in well-defined C++ code; comparison may be "
	"assumed to always evaluate to %select{true\|false}0">,
	InGroup<TautologicalUndefinedCompare>;
	def warn_address_of_reference_null_compare : Warning<
	"reference cannot be bound to dereferenced null pointer in well-defined C++ "
	"code; comparison may be assumed to always evaluate to "
	"%select{true\|false}0">,
	InGroup<TautologicalUndefinedCompare>;
	def note_reference_is_return_value : Note<"%0 returns a reference">;

	def note_pointer_declared_here : Note<
	"pointer %0 declared here">;
	def warn_division_sizeof_ptr : Warning<
	"'%0' will return the size of the pointer, not the array itself">,
	InGroup<DiagGroup<"sizeof-pointer-div">>;
	def warn_division_sizeof_array : Warning<
	"expression does not compute the number of elements in this array; element "
	"type is %0, not %1">,
	InGroup<DiagGroup<"sizeof-array-div">>;

	def note_function_warning_silence : Note<
	"prefix with the address-of operator to silence this warning">;
	def note_function_to_function_call : Note<
	"suffix with parentheses to turn this into a function call">;
	def warn_impcast_objective_c_literal_to_bool : Warning<
	"implicit boolean conversion of Objective-C object literal always "
	"evaluates to true">,
	InGroup<ObjCLiteralConversion>;

	def warn_cast_align : Warning<
	"cast from %0 to %1 increases required alignment from %2 to %3">,
	InGroup<CastAlign>, DefaultIgnore;
	def warn_old_style_cast : Warning<
	"use of old-style cast">, InGroup<OldStyleCast>, DefaultIgnore,
	SuppressInSystemMacro;

	// Separate between casts to void* and non-void* pointers.
	// Some APIs use (abuse) void* for something like a user context,
	// and often that value is an integer even if it isn't a pointer itself.
	// Having a separate warning flag allows users to control the warning
	// for their workflow.
	def warn_int_to_pointer_cast : Warning<
	"cast to %1 from smaller integer type %0">,
	InGroup<IntToPointerCast>;
	def warn_int_to_void_pointer_cast : Warning<
	"cast to %1 from smaller integer type %0">,
	InGroup<IntToVoidPointerCast>;
	def warn_pointer_to_int_cast : Warning<
	"cast to smaller integer type %1 from %0">,
	InGroup<PointerToIntCast>;
	def warn_pointer_to_enum_cast : Warning<
	warn_pointer_to_int_cast.Summary>,
	InGroup<PointerToEnumCast>;
	def warn_void_pointer_to_int_cast : Warning<
	"cast to smaller integer type %1 from %0">,
	InGroup<VoidPointerToIntCast>;
	def warn_void_pointer_to_enum_cast : Warning<
	warn_void_pointer_to_int_cast.Summary>,
	InGroup<VoidPointerToEnumCast>;

	def warn_attribute_ignored_for_field_of_type : Warning<
	"%0 attribute ignored for field of type %1">,
	InGroup<IgnoredAttributes>;
	def warn_no_underlying_type_specified_for_enum_bitfield : Warning<
	"enums in the Microsoft ABI are signed integers by default; consider giving "
	"the enum %0 an unsigned underlying type to make this code portable">,
	InGroup<SignedEnumBitfield>, DefaultIgnore;
	def warn_attribute_packed_for_bitfield : Warning<
	"'packed' attribute was ignored on bit-fields with single-byte alignment "
	"in older versions of GCC and Clang">,
	InGroup<DiagGroup<"attribute-packed-for-bitfield">>;
	def warn_transparent_union_attribute_field_size_align : Warning<
	"%select{alignment\|size}0 of field %1 (%2 bits) does not match the "
	"%select{alignment\|size}0 of the first field in transparent union; "
	"transparent_union attribute ignored">,
	InGroup<IgnoredAttributes>;
	def note_transparent_union_first_field_size_align : Note<
	"%select{alignment\|size}0 of first field is %1 bits">;
	def warn_transparent_union_attribute_not_definition : Warning<
	"transparent_union attribute can only be applied to a union definition; "
	"attribute ignored">,
	InGroup<IgnoredAttributes>;
	def warn_transparent_union_attribute_floating : Warning<
	"first field of a transparent union cannot have %select{floating point\|"
	"vector}0 type %1; transparent_union attribute ignored">,
	InGroup<IgnoredAttributes>;
	def warn_transparent_union_attribute_zero_fields : Warning<
	"transparent union definition must contain at least one field; "
	"transparent_union attribute ignored">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_type_not_supported : Warning<
	"%0 attribute argument not supported: %1">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_type_not_supported_global : Warning<
	"%0 attribute argument '%1' not supported on a global variable">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_unknown_visibility : Warning<"unknown visibility %0">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_protected_visibility :
	Warning<"target does not support 'protected' visibility; using 'default'">,
	InGroup<DiagGroup<"unsupported-visibility">>;
	def err_mismatched_visibility: Error<"visibility does not match previous declaration">;
	def note_previous_attribute : Note<"previous attribute is here">;
	def note_conflicting_attribute : Note<"conflicting attribute is here">;
	def note_attribute : Note<"attribute is here">;
	def err_mismatched_ms_inheritance : Error<
	"inheritance model does not match %select{definition\|previous declaration}0">;
	def warn_ignored_ms_inheritance : Warning<
	"inheritance model ignored on %select{primary template\|partial specialization}0">,
	InGroup<IgnoredAttributes>;
	def note_previous_ms_inheritance : Note<
	"previous inheritance model specified here">;
	def err_machine_mode : Error<"%select{unknown\|unsupported}0 machine mode %1">;
	def err_mode_not_primitive : Error<
	"mode attribute only supported for integer and floating-point types">;
	def err_mode_wrong_type : Error<
	"type of machine mode does not match type of base type">;
	def warn_vector_mode_deprecated : Warning<
	"specifying vector types with the 'mode' attribute is deprecated; "
	"use the 'vector_size' attribute instead">,
	InGroup<DeprecatedAttributes>;
	def warn_deprecated_noreturn_spelling : Warning<
	"the '[[_Noreturn]]' attribute spelling is deprecated in C2x; use "
	"'[[noreturn]]' instead">, InGroup<DeprecatedAttributes>;
	def err_complex_mode_vector_type : Error<
	"type of machine mode does not support base vector types">;
	def err_enum_mode_vector_type : Error<
	"mode %0 is not supported for enumeration types">;
	def warn_attribute_nonnull_no_pointers : Warning<
	"'nonnull' attribute applied to function with no pointer arguments">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_nonnull_parm_no_args : Warning<
	"'nonnull' attribute when used on parameters takes no arguments">,
	InGroup<IgnoredAttributes>;
	def warn_function_stmt_attribute_precedence : Warning<
	"statement attribute %0 has higher precedence than function attribute "
	"'%select{always_inline\|flatten\|noinline}1'">,
	InGroup<IgnoredAttributes>;
	def note_declared_nonnull : Note<
	"declared %select{'returns_nonnull'\|'nonnull'}0 here">;
	def warn_attribute_sentinel_named_arguments : Warning<
	"'sentinel' attribute requires named arguments">,
	InGroup<IgnoredAttributes>;
	def warn_attribute_sentinel_not_variadic : Warning<
	"'sentinel' attribute only supported for variadic %select{functions\|blocks}0">,
	InGroup<IgnoredAttributes>;
	def warn_deprecated_ignored_on_using : Warning<
	"%0 currently has no effect on a using declaration">,
	InGroup<IgnoredAttributes>;
	def err_attribute_sentinel_less_than_zero : Error<
	"'sentinel' parameter 1 less than zero">;
	def err_attribute_sentinel_not_zero_or_one : Error<
	"'sentinel' parameter 2 not 0 or 1">;
	def warn_cleanup_ext : Warning<
	"GCC does not allow the 'cleanup' attribute argument to be anything other "
	"than a simple identifier">,
	InGroup<GccCompat>;
	def err_attribute_cleanup_arg_not_function : Error<
	"'cleanup' argument %select{\|%1 \|%1 }0is not a %select{\|\|single }0function">;
	def err_attribute_cleanup_func_must_take_one_arg : Error<
	"'cleanup' function %0 must take 1 parameter">;
	def err_attribute_cleanup_func_arg_incompatible_type : Error<
	"'cleanup' function %0 parameter has "
	"%diff{type $ which is incompatible with type $\|incompatible type}1,2">;
	def err_attribute_regparm_wrong_platform : Error<
	"'regparm' is not valid on this platform">;
	def err_attribute_regparm_invalid_number : Error<
	"'regparm' parameter must be between 0 and %0 inclusive">;
	def err_attribute_not_supported_in_lang : Error<
	"%0 attribute is not supported in %select{C\|C++\|Objective-C}1">;
	def err_attribute_not_supported_on_arch
	: Error<"%0 attribute is not supported on '%1'">;
	def warn_gcc_ignores_type_attr : Warning<
	"GCC does not allow the %0 attribute to be written on a type">,
	InGroup<GccCompat>;
	def warn_gcc_requires_variadic_function : Warning<
	"GCC requires a function with the %0 attribute to be variadic">,
	InGroup<GccCompat>;

	// Clang-Specific Attributes
	def warn_attribute_iboutlet : Warning<
	"%0 attribute can only be applied to instance variables or properties">,
	InGroup<IgnoredAttributes>;
	def err_iboutletcollection_type : Error<
	"invalid type %0 as argument of iboutletcollection attribute">;
	def err_iboutletcollection_builtintype : Error<
	"type argument of iboutletcollection attribute cannot be a builtin type">;
	def warn_iboutlet_object_type : Warning<
	"%select{instance variable\|property}2 with %0 attribute must "
	"be an object type (invalid %1)">, InGroup<ObjCInvalidIBOutletProperty>;
	def warn_iboutletcollection_property_assign : Warning<
	"IBOutletCollection properties should be copy/strong and not assign">,
	InGroup<ObjCInvalidIBOutletProperty>;

	def err_attribute_overloadable_mismatch : Error<
	"redeclaration of %0 must %select{not \|}1have the 'overloadable' attribute">;
	def note_attribute_overloadable_prev_overload : Note<
	"previous %select{unmarked \|}0overload of function is here">;
	def err_attribute_overloadable_no_prototype : Error<
	"'overloadable' function %0 must have a prototype">;
	def err_attribute_overloadable_multiple_unmarked_overloads : Error<
	"at most one overload for a given name may lack the 'overloadable' "
	"attribute">;
	def warn_attribute_no_builtin_invalid_builtin_name : Warning<
	"'%0' is not a valid builtin name for %1">,
	InGroup<DiagGroup<"invalid-no-builtin-names">>;
	def err_attribute_no_builtin_wildcard_or_builtin_name : Error<
	"empty %0 cannot be composed with named ones">;
	def err_attribute_no_builtin_on_non_definition : Error<
	"%0 attribute is permitted on definitions only">;
	def err_attribute_no_builtin_on_defaulted_deleted_function : Error<
	"%0 attribute has no effect on defaulted or deleted functions">;
	def warn_ns_attribute_wrong_return_type : Warning<
	"%0 attribute only applies to %select{functions\|methods\|properties}1 that "
	"return %select{an Objective-C object\|a pointer\|a non-retainable pointer}2">,
	InGroup<IgnoredAttributes>;
	def err_ns_attribute_wrong_parameter_type : Error<
	"%0 attribute only applies to "
	"%select{Objective-C object\|pointer\|pointer-to-CF-pointer}1 parameters">;
	def warn_ns_attribute_wrong_parameter_type : Warning<
	"%0 attribute only applies to "
	"%select{Objective-C object\|pointer\|pointer-to-CF-pointer\|pointer/reference-to-OSObject-pointer}1 parameters">,
	InGroup<IgnoredAttributes>;
	def warn_objc_requires_super_protocol : Warning<
	"%0 attribute cannot be applied to %select{methods in protocols\|dealloc}1">,
	InGroup<DiagGroup<"requires-super-attribute">>;
	def note_protocol_decl : Note<
	"protocol is declared here">;
	def note_protocol_decl_undefined : Note<
	"protocol %0 has no definition">;
	def err_attribute_preferred_name_arg_invalid : Error<
	"argument %0 to 'preferred_name' attribute is not a typedef for "
	"a specialization of %1">;
	def err_attribute_builtin_alias : Error<
	"%0 attribute can only be applied to a ARM, HLSL or RISC-V builtin">;

	// called-once attribute diagnostics.
	def err_called_once_attribute_wrong_type : Error<
	"'called_once' attribute only applies to function-like parameters">;

	def warn_completion_handler_never_called : Warning<
	"%select{\|captured }1completion handler is never called">,
	InGroup<CompletionHandler>, DefaultIgnore;
	def warn_called_once_never_called : Warning<
	"%select{\|captured }1%0 parameter marked 'called_once' is never called">,
	InGroup<CalledOnceParameter>;

	def warn_completion_handler_never_called_when : Warning<
	"completion handler is never %select{used\|called}1 when "
	"%select{taking true branch\|taking false branch\|"
	"handling this case\|none of the cases applies\|"
	"entering the loop\|skipping the loop\|taking one of the branches}2">,
	InGroup<CompletionHandler>, DefaultIgnore;
	def warn_called_once_never_called_when : Warning<
	"%0 parameter marked 'called_once' is never %select{used\|called}1 when "
	"%select{taking true branch\|taking false branch\|"
	"handling this case\|none of the cases applies\|"
	"entering the loop\|skipping the loop\|taking one of the branches}2">,
	InGroup<CalledOnceParameter>;

	def warn_completion_handler_called_twice : Warning<
	"completion handler is called twice">,
	InGroup<CompletionHandler>, DefaultIgnore;
	def warn_called_once_gets_called_twice : Warning<
	"%0 parameter marked 'called_once' is called twice">,
	InGroup<CalledOnceParameter>;
	def note_called_once_gets_called_twice : Note<
	"previous call is here%select{; set to nil to indicate "
	"it cannot be called afterwards\|}0">;

	// objc_designated_initializer attribute diagnostics.
	def warn_objc_designated_init_missing_super_call : Warning<
	"designated initializer missing a 'super' call to a designated initializer of the super class">,
	InGroup<ObjCDesignatedInit>;
	def note_objc_designated_init_marked_here : Note<
	"method marked as designated initializer of the class here">;
	def warn_objc_designated_init_non_super_designated_init_call : Warning<
	"designated initializer should only invoke a designated initializer on 'super'">,
	InGroup<ObjCDesignatedInit>;
	def warn_objc_designated_init_non_designated_init_call : Warning<
	"designated initializer invoked a non-designated initializer">,
	InGroup<ObjCDesignatedInit>;
	def warn_objc_secondary_init_super_init_call : Warning<
	"convenience initializer should not invoke an initializer on 'super'">,
	InGroup<ObjCDesignatedInit>;
	def warn_objc_secondary_init_missing_init_call : Warning<
	"convenience initializer missing a 'self' call to another initializer">,
	InGroup<ObjCDesignatedInit>;
	def warn_objc_implementation_missing_designated_init_override : Warning<
	"method override for the designated initializer of the superclass %objcinstance0 not found">,
	InGroup<ObjCDesignatedInit>;
	def err_designated_init_attr_non_init : Error<
	"'objc_designated_initializer' attribute only applies to init methods "
	"of interface or class extension declarations">;

	// objc_bridge attribute diagnostics.
	def err_objc_attr_not_id : Error<
	"parameter of %0 attribute must be a single name of an Objective-C %select{class\|protocol}1">;
	def err_objc_attr_typedef_not_id : Error<
	"parameter of %0 attribute must be 'id' when used on a typedef">;
	def err_objc_attr_typedef_not_void_pointer : Error<
	"'objc_bridge(id)' is only allowed on structs and typedefs of void pointers">;
	def err_objc_cf_bridged_not_interface : Error<
	"CF object of type %0 is bridged to %1, which is not an Objective-C class">;
	def err_objc_ns_bridged_invalid_cfobject : Error<
	"ObjectiveC object of type %0 is bridged to %1, which is not valid CF object">;
	def warn_objc_invalid_bridge : Warning<
	"%0 bridges to %1, not %2">, InGroup<ObjCBridge>;
	def warn_objc_invalid_bridge_to_cf : Warning<
	"%0 cannot bridge to %1">, InGroup<ObjCBridge>;

	// objc_bridge_related attribute diagnostics.
	def err_objc_bridged_related_invalid_class : Error<
	"could not find Objective-C class %0 to convert %1 to %2">;
	def err_objc_bridged_related_invalid_class_name : Error<
	"%0 must be name of an Objective-C class to be able to convert %1 to %2">;
	def err_objc_bridged_related_known_method : Error<
	"%0 must be explicitly converted to %1; use %select{%objcclass2\|%objcinstance2}3 "
	"method for this conversion">;

	def err_objc_attr_protocol_requires_definition : Error<
	"attribute %0 can only be applied to @protocol definitions, not forward declarations">;

	// Swift attributes.
	def warn_attr_swift_name_function
	: Warning<"%0 attribute argument must be a string literal specifying a Swift function name">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_invalid_identifier
	: Warning<"%0 attribute has invalid identifier for the %select{base\|context\|parameter}1 name">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_decl_kind
	: Warning<"%0 attribute cannot be applied to this declaration">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_subscript_invalid_parameter
	: Warning<"%0 attribute for 'subscript' must %select{be a getter or setter\|"
	"have at least one parameter\|"
	"have a 'self:' parameter}1">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_missing_parameters
	: Warning<"%0 attribute is missing parameter label clause">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_setter_parameters
	: Warning<"%0 attribute for setter must have one parameter for new value">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_multiple_selfs
	: Warning<"%0 attribute cannot specify more than one 'self:' parameter">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_getter_parameters
	: Warning<"%0 attribute for getter must not have any parameters besides 'self:'">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_subscript_setter_no_newValue
	: Warning<"%0 attribute for 'subscript' setter must have a 'newValue:' parameter">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_subscript_setter_multiple_newValues
	: Warning<"%0 attribute for 'subscript' setter cannot have multiple 'newValue:' parameters">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_subscript_getter_newValue
	: Warning<"%0 attribute for 'subscript' getter cannot have a 'newValue:' parameter">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_num_params
	: Warning<"too %select{few\|many}0 parameters in the signature specified by "
	"the %1 attribute (expected %2; got %3)">,
	InGroup<SwiftNameAttribute>;
	def warn_attr_swift_name_decl_missing_params
	: Warning<"%0 attribute cannot be applied to a %select{function\|method}1 "
	"with no parameters">,
	InGroup<SwiftNameAttribute>;

	def err_attr_swift_error_no_error_parameter : Error<
	"%0 attribute can only be applied to a %select{function\|method}1 with an "
	"error parameter">;
	def err_attr_swift_error_return_type : Error<
	"%0 attribute with '%1' convention can only be applied to a "
	"%select{function\|method}2 returning %select{an integral type\|a pointer}3">;

	def err_swift_async_no_access : Error<
	"first argument to 'swift_async' must be either 'none', 'swift_private', or "
	"'not_swift_private'">;
	def err_swift_async_bad_block_type : Error<
	"'swift_async' completion handler parameter must have block type returning"
	" 'void', type here is %0">;

	def err_swift_async_error_without_swift_async : Error<
	"%0 attribute must be applied to a %select{function\|method}1 annotated "
	"with non-'none' attribute 'swift_async'">;
	def err_swift_async_error_no_error_parameter : Error<
	"%0 attribute with 'nonnull_error' convention can only be applied to a "
	"%select{function\|method}1 with a completion handler with an error "
	"parameter">;
	def err_swift_async_error_non_integral : Error<
	"%0 attribute with '%1' convention must have an integral-typed parameter "
	"in completion handler at index %2, type here is %3">;

	def warn_ignored_objc_externally_retained : Warning<
	"'objc_externally_retained' can only be applied to local variables "
	"%select{of retainable type\|with strong ownership}0">,
	InGroup<IgnoredAttributes>;

	// Function Parameter Semantic Analysis.
	def err_param_with_void_type : Error<"argument may not have 'void' type">;
	def err_void_only_param : Error<
	"'void' must be the first and only parameter if specified">;
	def err_void_param_qualified : Error<
	"'void' as parameter must not have type qualifiers">;
	def err_ident_list_in_fn_declaration : Error<
	"a parameter list without types is only allowed in a function definition">;
	def ext_param_not_declared : ExtWarn<
	"parameter %0 was not declared, defaults to 'int'; ISO C99 and later do not "
	"support implicit int">, InGroup<ImplicitInt>;
	def err_param_default_argument : Error<
	"C does not support default arguments">;
	def err_param_default_argument_redefinition : Error<
	"redefinition of default argument">;
	def ext_param_default_argument_redefinition : ExtWarn<
	err_param_default_argument_redefinition.Summary>,
	InGroup<MicrosoftDefaultArgRedefinition>;
	def err_param_default_argument_missing : Error<
	"missing default argument on parameter">;
	def err_param_default_argument_missing_name : Error<
	"missing default argument on parameter %0">;
	def err_param_default_argument_references_param : Error<
	"default argument references parameter %0">;
	def err_param_default_argument_references_local : Error<
	"default argument references local variable %0 of enclosing function">;
	def err_param_default_argument_references_this : Error<
	"default argument references 'this'">;
	def err_param_default_argument_nonfunc : Error<
	"default arguments can only be specified for parameters in a function "
	"declaration">;
	def err_param_default_argument_template_redecl : Error<
	"default arguments cannot be added to a function template that has already "
	"been declared">;
	def err_param_default_argument_member_template_redecl : Error<
	"default arguments cannot be added to an out-of-line definition of a member "
	"of a %select{class template\|class template partial specialization\|nested "
	"class in a template}0">;
	def err_param_default_argument_on_parameter_pack : Error<
	"parameter pack cannot have a default argument">;
	def err_uninitialized_member_for_assign : Error<
	"cannot define the implicit copy assignment operator for %0, because "
	"non-static %select{reference\|const}1 member %2 cannot use copy "
	"assignment operator">;
	def err_uninitialized_member_in_ctor : Error<
	"%select{constructor for %1\|"
	"implicit default constructor for %1\|"
	"cannot use constructor inherited from %1:}0 must explicitly "
	"initialize the %select{reference\|const}2 member %3">;
	def err_default_arg_makes_ctor_special : Error<
	"addition of default argument on redeclaration makes this constructor a "
	"%select{default\|copy\|move}0 constructor">;
	def err_stmt_expr_in_default_arg : Error<
	"default %select{argument\|non-type template argument}0 may not use a GNU "
	"statement expression">;

	def err_use_of_default_argument_to_function_declared_later : Error<
	"use of default argument to function %0 that is declared later in class %1">;
	def note_default_argument_declared_here : Note<
	"default argument declared here">;
	def err_recursive_default_argument : Error<"recursive evaluation of default argument">;
	def note_recursive_default_argument_used_here : Note<
	"default argument used here">;

	def ext_param_promoted_not_compatible_with_prototype : ExtWarn<
	"%diff{promoted type $ of K&R function parameter is not compatible with the "
	"parameter type $\|promoted type of K&R function parameter is not compatible "
	"with parameter type}0,1 declared in a previous prototype">,
	InGroup<KNRPromotedParameter>;


	// C++ Overloading Semantic Analysis.
	def err_ovl_diff_return_type : Error<
	"functions that differ only in their return type cannot be overloaded">;
	def err_ovl_static_nonstatic_member : Error<
	"static and non-static member functions with the same parameter types "
	"cannot be overloaded">;

	let Deferrable = 1 in {

	def err_ovl_no_viable_function_in_call : Error<
	"no matching function for call to %0">;
	def err_ovl_no_viable_member_function_in_call : Error<
	"no matching member function for call to %0">;
	def err_ovl_ambiguous_call : Error<
	"call to %0 is ambiguous">;
	def err_ovl_deleted_call : Error<"call to deleted function %0">;
	def err_ovl_ambiguous_member_call : Error<
	"call to member function %0 is ambiguous">;
	def err_ovl_deleted_member_call : Error<
	"call to deleted member function %0">;
	def note_ovl_too_many_candidates : Note<
	"remaining %0 candidate%s0 omitted; "
	"pass -fshow-overloads=all to show them">;

	def select_ovl_candidate_kind : TextSubstitution<
	"%select{function\|function\|function (with reversed parameter order)\|"
	"constructor\|"
	"constructor (the implicit default constructor)\|"
	"constructor (the implicit copy constructor)\|"
	"constructor (the implicit move constructor)\|"
	"function (the implicit copy assignment operator)\|"
	"function (the implicit move assignment operator)\|"
	"function (the implicit 'operator==' for this 'operator<=>)'\|"
	"inherited constructor}0%select{\| template\| %2}1">;

	def note_ovl_candidate : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,3"
	"%select{\| has different class%diff{ (expected $ but has $)\|}5,6"
	"\| has different number of parameters (expected %5 but has %6)"
	"\| has type mismatch at %ordinal5 parameter"
	"%diff{ (expected $ but has $)\|}6,7"
	"\| has different return type%diff{ ($ expected but has $)\|}5,6"
	"\| has different qualifiers (expected %5 but found %6)"
	"\| has different exception specification}4">;

	def note_ovl_candidate_explicit : Note<
	"explicit %select{constructor\|conversion function\|deduction guide}0 "
	"is not a candidate%select{\| (explicit specifier evaluates to true)}1">;
	def note_ovl_candidate_inherited_constructor : Note<
	"constructor from base class %0 inherited here">;
	def note_ovl_candidate_inherited_constructor_slice : Note<
	"candidate %select{constructor\|template}0 ignored: "
	"inherited constructor cannot be used to %select{copy\|move}1 object">;
	def note_ovl_candidate_illegal_constructor : Note<
	"candidate %select{constructor\|template}0 ignored: "
	"instantiation %select{takes\|would take}0 its own class type by value">;
	def note_ovl_candidate_illegal_constructor_adrspace_mismatch : Note<
	"candidate constructor ignored: cannot be used to construct an object "
	"in address space %0">;
	def note_ovl_candidate_bad_deduction : Note<
	"candidate template ignored: failed template argument deduction">;
	def note_ovl_candidate_incomplete_deduction : Note<"candidate template ignored: "
	"couldn't infer template argument %0">;
	def note_ovl_candidate_incomplete_deduction_pack : Note<
	"candidate template ignored: "
	"deduced too few arguments for expanded pack %0; no argument for %ordinal1 "
	"expanded parameter in deduced argument pack %2">;
	def note_ovl_candidate_inconsistent_deduction : Note<
	"candidate template ignored: deduced %select{conflicting types\|"
	"conflicting values\|conflicting templates\|packs of different lengths}0 "
	"for parameter %1%diff{ ($ vs. $)\|}2,3">;
	def note_ovl_candidate_inconsistent_deduction_types : Note<
	"candidate template ignored: deduced values %diff{"
	"of conflicting types for parameter %0 (%1 of type $ vs. %3 of type $)\|"
	"%1 and %3 of conflicting types for parameter %0}2,4">;
	def note_ovl_candidate_explicit_arg_mismatch_named : Note<
	"candidate template ignored: invalid explicitly-specified argument "
	"for template parameter %0">;
	def note_ovl_candidate_unsatisfied_constraints : Note<
	"candidate template ignored: constraints not satisfied%0">;
	def note_ovl_candidate_explicit_arg_mismatch_unnamed : Note<
	"candidate template ignored: invalid explicitly-specified argument "
	"for %ordinal0 template parameter">;
	def note_ovl_candidate_instantiation_depth : Note<
	"candidate template ignored: substitution exceeded maximum template "
	"instantiation depth">;
	def note_ovl_candidate_underqualified : Note<
	"candidate template ignored: cannot deduce a type for %0 that would "
	"make %2 equal %1">;
	def note_ovl_candidate_substitution_failure : Note<
	"candidate template ignored: substitution failure%0%1">;
	def note_ovl_candidate_disabled_by_enable_if : Note<
	"candidate template ignored: disabled by %0%1">;
	def note_ovl_candidate_disabled_by_requirement : Note<
	"candidate template ignored: requirement '%0' was not satisfied%1">;
	def note_ovl_candidate_has_pass_object_size_params: Note<
	"candidate address cannot be taken because parameter %0 has "
	"pass_object_size attribute">;
	def err_diagnose_if_succeeded : Error<"%0">;
	def warn_diagnose_if_succeeded : Warning<"%0">, InGroup<UserDefinedWarnings>,
	ShowInSystemHeader;
	def note_ovl_candidate_disabled_by_function_cond_attr : Note<
	"candidate disabled: %0">;
	def err_addrof_function_disabled_by_enable_if_attr : Error<
	"cannot take address of function %0 because it has one or more "
	"non-tautological enable_if conditions">;
	def err_addrof_function_constraints_not_satisfied : Error<
	"cannot take address of function %0 because its constraints are not "
	"satisfied">;
	def note_addrof_ovl_candidate_disabled_by_enable_if_attr : Note<
	"candidate function made ineligible by enable_if">;
	def note_ovl_candidate_deduced_mismatch : Note<
	"candidate template ignored: deduced type "
	"%diff{$ of %select{\|element of }4%ordinal0 parameter does not match "
	"adjusted type $ of %select{\|element of }4argument"
	"\|of %select{\|element of }4%ordinal0 parameter does not match "
	"adjusted type of %select{\|element of }4argument}1,2%3">;
	def note_ovl_candidate_non_deduced_mismatch : Note<
	"candidate template ignored: could not match %diff{$ against $\|types}0,1">;
	// This note is needed because the above note would sometimes print two
	// different types with the same name. Remove this note when the above note
	// can handle that case properly.
	def note_ovl_candidate_non_deduced_mismatch_qualified : Note<
	"candidate template ignored: could not match %q0 against %q1">;

	// Note that we don't treat templates differently for this diagnostic.
	def note_ovl_candidate_arity : Note<"candidate "
	"%sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"requires%select{ at least\| at most\|}3 %4 argument%s4, but %5 "
	"%plural{1:was\|:were}5 provided">;

	def note_ovl_candidate_arity_one : Note<"candidate "
	"%sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"%select{requires at least\|allows at most single\|requires single}3 "
	"argument %4, but %plural{0:no\|:%5}5 arguments were provided">;

	def note_ovl_candidate_deleted : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 has been "
	"%select{explicitly made unavailable\|explicitly deleted\|"
	"implicitly deleted}3">;

	// Giving the index of the bad argument really clutters this message, and
	// it's relatively unimportant because 1) it's generally obvious which
	// argument(s) are of the given object type and 2) the fix is usually
	// to complete the type, which doesn't involve changes to the call line
	// anyway. If people complain, we can change it.
	def note_ovl_candidate_bad_conv_incomplete : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"cannot convert argument of incomplete type "
	"%diff{$ to $\|to parameter type}3,4 for "
	"%select{%ordinal6 argument\|object argument}5"
	"%select{\|; dereference the argument with *\|"
	"; take the address of the argument with &\|"
	"; remove *\|"
	"; remove &}7">;
	def note_ovl_candidate_bad_list_argument : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"%select{cannot convert initializer list\|too few initializers in list"
	"\|too many initializers in list}7 argument to %4">;
	def note_ovl_candidate_bad_overload : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"no overload of %4 matching %3 for %ordinal5 argument">;
	def note_ovl_candidate_bad_conv : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"no known conversion "
	"%diff{from $ to $\|from argument type to parameter type}3,4 for "
	"%select{%ordinal6 argument\|object argument}5"
	"%select{\|; dereference the argument with *\|"
	"; take the address of the argument with &\|"
	"; remove *\|"
	"; remove &}7">;
	def note_ovl_candidate_bad_arc_conv : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"cannot implicitly convert argument "
	"%diff{of type $ to $\|type to parameter type}3,4 for "
	"%select{%ordinal6 argument\|object argument}5 under ARC">;
	def note_ovl_candidate_bad_value_category : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"expects an %select{lvalue\|rvalue}5 for "
	"%select{%ordinal4 argument\|object argument}3">;
	def note_ovl_candidate_bad_addrspace : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"cannot %select{pass pointer to\|bind reference in}5 %3 "
	"%select{as a pointer to\|to object in}5 %4 in %ordinal6 "
	"argument">;
	def note_ovl_candidate_bad_addrspace_this : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"'this' object is in %3, but method expects object in %4">;
	def note_ovl_candidate_bad_gc : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"%select{%ordinal7\|'this'}6 argument (%3) has %select{no\|__weak\|__strong}4 "
	"ownership, but parameter has %select{no\|__weak\|__strong}5 ownership">;
	def note_ovl_candidate_bad_ownership : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"%select{%ordinal7\|'this'}6 argument (%3) has "
	"%select{no\|__unsafe_unretained\|__strong\|__weak\|__autoreleasing}4 ownership,"
	" but parameter has %select{no\|__unsafe_unretained\|__strong\|__weak\|"
	"__autoreleasing}5 ownership">;
	def note_ovl_candidate_bad_cvr_this : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"'this' argument has type %3, but method is not marked "
	"%select{const\|restrict\|const or restrict\|volatile\|const or volatile\|"
	"volatile or restrict\|const, volatile, or restrict}4">;
	def note_ovl_candidate_bad_cvr : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"%ordinal5 argument (%3) would lose "
	"%select{const\|restrict\|const and restrict\|volatile\|const and volatile\|"
	"volatile and restrict\|const, volatile, and restrict}4 qualifier"
	"%select{\|\|s\|\|s\|s\|s}4">;
	def note_ovl_candidate_bad_unaligned : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"%ordinal5 argument (%3) would lose __unaligned qualifier">;
	def note_ovl_candidate_bad_base_to_derived_conv : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"cannot %select{convert from\|convert from\|bind}3 "
	"%select{base class pointer\|superclass\|base class object of type}3 %4 to "
	"%select{derived class pointer\|subclass\|derived class reference}3 %5 for "
	"%ordinal6 argument">;
	def note_ovl_candidate_bad_target : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
	"call to "
	"%select{__device__\|__global__\|__host__\|__host__ __device__\|invalid}3 function from"
	" %select{__device__\|__global__\|__host__\|__host__ __device__\|invalid}4 function">;
	def note_ovl_candidate_constraints_not_satisfied : Note<
	"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: constraints "
	"not satisfied">;
	def note_implicit_member_target_infer_collision : Note<
	"implicit %sub{select_special_member_kind}0 inferred target collision: call to both "
	"%select{__device__\|__global__\|__host__\|__host__ __device__}1 and "
	"%select{__device__\|__global__\|__host__\|__host__ __device__}2 members">;

	def note_ambiguous_type_conversion: Note<
	"because of ambiguity in conversion %diff{of $ to $\|between types}0,1">;
	def note_ovl_builtin_candidate : Note<"built-in candidate %0">;
	def err_ovl_no_viable_function_in_init : Error<
	"no matching constructor for initialization of %0">;
	def err_ovl_no_conversion_in_cast : Error<
	"cannot convert %1 to %2 without a conversion operator">;
	def err_ovl_no_viable_conversion_in_cast : Error<
	"no matching conversion for %select{\|static_cast\|reinterpret_cast\|"
	"dynamic_cast\|C-style cast\|functional-style cast\|}0 from %1 to %2">;
	def err_ovl_ambiguous_conversion_in_cast : Error<
	"ambiguous conversion for %select{\|static_cast\|reinterpret_cast\|"
	"dynamic_cast\|C-style cast\|functional-style cast\|}0 from %1 to %2">;
	def err_ovl_deleted_conversion_in_cast : Error<
	"%select{\|static_cast\|reinterpret_cast\|dynamic_cast\|C-style cast\|"
	"functional-style cast\|}0 from %1 to %2 uses deleted function">;
	def err_ovl_ambiguous_init : Error<"call to constructor of %0 is ambiguous">;
	def err_ref_init_ambiguous : Error<
	"reference initialization of type %0 with initializer of type %1 is ambiguous">;
	def err_ovl_deleted_init : Error<
	"call to deleted constructor of %0">;
	def err_ovl_deleted_special_init : Error<
	"call to implicitly-deleted %select{default constructor\|copy constructor\|"
	"move constructor\|copy assignment operator\|move assignment operator\|"
	"destructor\|function}0 of %1">;
	def err_ovl_ambiguous_oper_unary : Error<
	"use of overloaded operator '%0' is ambiguous (operand type %1)">;
	def err_ovl_ambiguous_oper_binary : Error<
	"use of overloaded operator '%0' is ambiguous (with operand types %1 and %2)">;
	def ext_ovl_ambiguous_oper_binary_reversed : ExtWarn<
	"ISO C++20 considers use of overloaded operator '%0' (with operand types %1 "
	"and %2) to be ambiguous despite there being a unique best viable function"
	"%select{ with non-reversed arguments\|}3">,
	InGroup<DiagGroup<"ambiguous-reversed-operator">>, SFINAEFailure;
	def note_ovl_ambiguous_oper_binary_reversed_self : Note<
	"ambiguity is between a regular call to this operator and a call with the "
	"argument order reversed">;
	def note_ovl_ambiguous_eqeq_reversed_self_non_const : Note<
	"mark 'operator==' as const or add a matching 'operator!=' to resolve the ambiguity">;
	def note_ovl_ambiguous_oper_binary_selected_candidate : Note<
	"candidate function with non-reversed arguments">;
	def note_ovl_ambiguous_oper_binary_reversed_candidate : Note<
	"ambiguous candidate function with reversed arguments">;
	def err_ovl_no_viable_oper : Error<"no viable overloaded '%0'">;
	def note_assign_lhs_incomplete : Note<"type %0 is incomplete">;
	def err_ovl_deleted_oper : Error<
	"overload resolution selected deleted operator '%0'">;
	def err_ovl_deleted_special_oper : Error<
	"object of type %0 cannot be %select{constructed\|copied\|moved\|assigned\|"
	"assigned\|destroyed}1 because its %sub{select_special_member_kind}1 is "
	"implicitly deleted">;
	def err_ovl_deleted_comparison : Error<
	"object of type %0 cannot be compared because its %1 is implicitly deleted">;
	def err_ovl_rewrite_equalequal_not_bool : Error<
	"return type %0 of selected 'operator==' function for rewritten "
	"'%1' comparison is not 'bool'">;
	def ext_ovl_rewrite_equalequal_not_bool : ExtWarn<
	"ISO C++20 requires return type of selected 'operator==' function for "
	"rewritten '%1' comparison to be 'bool', not %0">,
	InGroup<DiagGroup<"rewrite-not-bool">>, SFINAEFailure;
	def err_ovl_no_viable_subscript :
	Error<"no viable overloaded operator[] for type %0">;
	def err_ovl_no_oper :
	Error<"type %0 does not provide a %select{subscript\|call}1 operator">;
	def err_ovl_unresolvable : Error<
	"reference to %select{overloaded\|multiversioned}1 function could not be "
	"resolved; did you mean to call it%select{\| with no arguments}0?">;
	def err_bound_member_function : Error<
	"reference to non-static member function must be called"
	"%select{\|; did you mean to call it with no arguments?}0">;
	def note_possible_target_of_call : Note<"possible target for call">;
	def err_no_viable_destructor : Error<
	"no viable destructor found for class %0">;
	def err_ambiguous_destructor : Error<
	"destructor of class %0 is ambiguous">;

	def err_ovl_no_viable_object_call : Error<
	"no matching function for call to object of type %0">;
	def err_ovl_ambiguous_object_call : Error<
	"call to object of type %0 is ambiguous">;
	def err_ovl_ambiguous_subscript_call : Error<
	"call to subscript operator of type %0 is ambiguous">;
	def err_ovl_deleted_object_call : Error<
	"call to deleted function call operator in type %0">;
	def note_ovl_surrogate_cand : Note<"conversion candidate of type %0">;
	def err_member_call_without_object : Error<
	"call to non-static member function without an object argument">;

	// C++ Address of Overloaded Function
	def err_addr_ovl_no_viable : Error<
	"address of overloaded function %0 does not match required type %1">;
	def err_addr_ovl_ambiguous : Error<
	"address of overloaded function %0 is ambiguous">;
	def err_addr_ovl_not_func_ptrref : Error<
	"address of overloaded function %0 cannot be converted to type %1">;
	def err_addr_ovl_no_qualifier : Error<
	"cannot form member pointer of type %0 without '&' and class name">;

	} // let Deferrable

	// C++11 Literal Operators
	def err_ovl_no_viable_literal_operator : Error<
	"no matching literal operator for call to %0"
	"%select{\| with argument of type %2\| with arguments of types %2 and %3}1"
	"%select{\| or 'const char *'}4"
	"%select{\|, and no matching literal operator template}5">;

	// C++ Template Declarations
	def err_template_param_shadow : Error<
	"declaration of %0 shadows template parameter">;
	def ext_template_param_shadow : ExtWarn<
	err_template_param_shadow.Summary>, InGroup<MicrosoftTemplateShadow>;
	def note_template_param_here : Note<"template parameter is declared here">;
	def warn_template_export_unsupported : Warning<
	"exported templates are unsupported">;
	def err_template_outside_namespace_or_class_scope : Error<
	"templates can only be declared in namespace or class scope">;
	def err_template_inside_local_class : Error<
	"templates cannot be declared inside of a local class">;
	def err_template_linkage : Error<"templates must have C++ linkage">;
	def err_template_typedef : Error<"a typedef cannot be a template">;
	def err_template_unnamed_class : Error<
	"cannot declare a class template with no name">;
	def err_template_param_list_different_arity : Error<
	"%select{too few\|too many}0 template parameters in template "
	"%select{\|template parameter }1redeclaration">;
	def note_template_param_list_different_arity : Note<
	"%select{too few\|too many}0 template parameters in template template "
	"argument">;
	def note_template_prev_declaration : Note<
	"previous template %select{declaration\|template parameter}0 is here">;
	def err_template_param_different_kind : Error<
	"template parameter has a different kind in template "
	"%select{\|template parameter }0redeclaration">;
	def note_template_param_different_kind : Note<
	"template parameter has a different kind in template argument">;

	def err_invalid_decl_specifier_in_nontype_parm : Error<
	"invalid declaration specifier in template non-type parameter">;

	def err_template_nontype_parm_different_type : Error<
	"template non-type parameter has a different type %0 in template "
	"%select{\|template parameter }1redeclaration">;

	def note_template_nontype_parm_different_type : Note<
	"template non-type parameter has a different type %0 in template argument">;
	def note_template_nontype_parm_prev_declaration : Note<
	"previous non-type template parameter with type %0 is here">;
	def err_template_nontype_parm_bad_type : Error<
	"a non-type template parameter cannot have type %0">;
	def err_template_nontype_parm_bad_structural_type : Error<
	"a non-type template parameter cannot have type %0 before C++20">;
	def err_template_nontype_parm_incomplete : Error<
	"non-type template parameter has incomplete type %0">;
	def err_template_nontype_parm_not_literal : Error<
	"non-type template parameter has non-literal type %0">;
	def err_template_nontype_parm_rvalue_ref : Error<
	"non-type template parameter has rvalue reference type %0">;
	def err_template_nontype_parm_not_structural : Error<
	"type %0 of non-type template parameter is not a structural type">;
	def note_not_structural_non_public : Note<
	"%0 is not a structural type because it has a "
	"%select{non-static data member\|base class}1 that is not public">;
	def note_not_structural_mutable_field : Note<
	"%0 is not a structural type because it has a mutable "
	"non-static data member">;
	def note_not_structural_rvalue_ref_field : Note<
	"%0 is not a structural type because it has a non-static data member "
	"of rvalue reference type">;
	def note_not_structural_subobject : Note<
	"%0 is not a structural type because it has a "
	"%select{non-static data member\|base class}1 of non-structural type %2">;
	def warn_cxx17_compat_template_nontype_parm_type : Warning<
	"non-type template parameter of type %0 is incompatible with "
	"C++ standards before C++20">,
	DefaultIgnore, InGroup<CXXPre20Compat>;
	def warn_cxx14_compat_template_nontype_parm_auto_type : Warning<
	"non-type template parameters declared with %0 are incompatible with C++ "
	"standards before C++17">,
	DefaultIgnore, InGroup<CXXPre17Compat>;
	def err_template_param_default_arg_redefinition : Error<
	"template parameter redefines default argument">;
	def err_template_param_default_arg_inconsistent_redefinition : Error<
	"template parameter default argument is inconsistent with previous definition">;
	def note_template_param_prev_default_arg : Note<
	"previous default template argument defined here">;
	def note_template_param_prev_default_arg_in_other_module : Note<
	"previous default template argument defined in module %0">;
	def err_template_param_default_arg_missing : Error<
	"template parameter missing a default argument">;
	def ext_template_parameter_default_in_function_template : ExtWarn<
	"default template arguments for a function template are a C++11 extension">,
	InGroup<CXX11>;
	def warn_cxx98_compat_template_parameter_default_in_function_template : Warning<
	"default template arguments for a function template are incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_template_parameter_default_template_member : Error<
	"cannot add a default template argument to the definition of a member of a "
	"class template">;
	def err_template_parameter_default_friend_template : Error<
	"default template argument not permitted on a friend template">;
	def err_template_template_parm_no_parms : Error<
	"template template parameter must have its own template parameters">;

	def ext_variable_template : ExtWarn<"variable templates are a C++14 extension">,
	InGroup<CXX14>;
	def warn_cxx11_compat_variable_template : Warning<
	"variable templates are incompatible with C++ standards before C++14">,
	InGroup<CXXPre14Compat>, DefaultIgnore;
	def err_template_variable_noparams : Error<
	"extraneous 'template<>' in declaration of variable %0">;
	def err_template_member : Error<"member %0 declared as a template">;
	def err_member_with_template_arguments : Error<"member %0 cannot have template arguments">;
	def err_template_member_noparams : Error<
	"extraneous 'template<>' in declaration of member %0">;
	def err_template_tag_noparams : Error<
	"extraneous 'template<>' in declaration of %0 %1">;

	def warn_cxx17_compat_adl_only_template_id : Warning<
	"use of function template name with no prior function template "
	"declaration in function call with explicit template arguments "
	"is incompatible with C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_adl_only_template_id : ExtWarn<
	"use of function template name with no prior declaration in function call "
	"with explicit template arguments is a C++20 extension">, InGroup<CXX20>;

	def warn_unqualified_call_to_std_cast_function : Warning<
	"unqualified call to '%0'">, InGroup<DiagGroup<"unqualified-std-cast-call">>;

	// C++ Template Argument Lists
	def err_template_missing_args : Error<
	"use of "
	"%select{class template\|function template\|variable template\|alias template\|"
	"template template parameter\|concept\|template}0 %1 requires template "
	"arguments">;
	def err_template_arg_list_different_arity : Error<
	"%select{too few\|too many}0 template arguments for "
	"%select{class template\|function template\|variable template\|alias template\|"
	"template template parameter\|concept\|template}1 %2">;
	def note_template_decl_here : Note<"template is declared here">;
	def err_template_arg_must_be_type : Error<
	"template argument for template type parameter must be a type">;
	def err_template_arg_must_be_type_suggest : Error<
	"template argument for template type parameter must be a type; "
	"did you forget 'typename'?">;
	def ext_ms_template_type_arg_missing_typename : ExtWarn<
	"template argument for template type parameter must be a type; "
	"omitted 'typename' is a Microsoft extension">,
	InGroup<MicrosoftTemplate>;
	def err_template_arg_must_be_expr : Error<
	"template argument for non-type template parameter must be an expression">;
	def err_template_arg_nontype_ambig : Error<
	"template argument for non-type template parameter is treated as function type %0">;
	def err_template_arg_must_be_template : Error<
	"template argument for template template parameter must be a class template%select{\| or type alias template}0">;
	def ext_template_arg_local_type : ExtWarn<
	"template argument uses local type %0">, InGroup<LocalTypeTemplateArgs>;
	def ext_template_arg_unnamed_type : ExtWarn<
	"template argument uses unnamed type">, InGroup<UnnamedTypeTemplateArgs>;
	def warn_cxx98_compat_template_arg_local_type : Warning<
	"local type %0 as template argument is incompatible with C++98">,
	InGroup<CXX98CompatLocalTypeTemplateArgs>, DefaultIgnore;
	def warn_cxx98_compat_template_arg_unnamed_type : Warning<
	"unnamed type as template argument is incompatible with C++98">,
	InGroup<CXX98CompatUnnamedTypeTemplateArgs>, DefaultIgnore;
	def note_template_unnamed_type_here : Note<
	"unnamed type used in template argument was declared here">;
	def err_template_arg_overload_type : Error<
	"template argument is the type of an unresolved overloaded function">;
	def err_template_arg_not_valid_template : Error<
	"template argument does not refer to a class or alias template, or template "
	"template parameter">;
	def note_template_arg_refers_here_func : Note<
	"template argument refers to function template %0, here">;
	def err_template_arg_template_params_mismatch : Error<
	"template template argument has different template parameters than its "
	"corresponding template template parameter">;
	def err_template_arg_not_integral_or_enumeral : Error<
	"non-type template argument of type %0 must have an integral or enumeration"
	" type">;
	def err_template_arg_not_ice : Error<
	"non-type template argument of type %0 is not an integral constant "
	"expression">;
	def err_template_arg_not_address_constant : Error<
	"non-type template argument of type %0 is not a constant expression">;
	def warn_cxx98_compat_template_arg_null : Warning<
	"use of null pointer as non-type template argument is incompatible with "
	"C++98">, InGroup<CXX98Compat>, DefaultIgnore;
	def err_template_arg_untyped_null_constant : Error<
	"null non-type template argument must be cast to template parameter type %0">;
	def err_template_arg_wrongtype_null_constant : Error<
	"null non-type template argument of type %0 does not match template parameter "
	"of type %1">;
	def err_non_type_template_parm_type_deduction_failure : Error<
	"non-type template parameter %0 with type %1 has incompatible initializer of type %2">;
	def err_deduced_non_type_template_arg_type_mismatch : Error<
	"deduced non-type template argument does not have the same type as the "
	"corresponding template parameter%diff{ ($ vs $)\|}0,1">;
	def err_non_type_template_arg_subobject : Error<
	"non-type template argument refers to subobject '%0'">;
	def err_non_type_template_arg_addr_label_diff : Error<
	"template argument / label address difference / what did you expect?">;
	def err_non_type_template_arg_unsupported : Error<
	"sorry, non-type template argument of type %0 is not yet supported">;
	def err_template_arg_not_convertible : Error<
	"non-type template argument of type %0 cannot be converted to a value "
	"of type %1">;
	def warn_template_arg_negative : Warning<
	"non-type template argument with value '%0' converted to '%1' for unsigned "
	"template parameter of type %2">, InGroup<Conversion>, DefaultIgnore;
	def warn_template_arg_too_large : Warning<
	"non-type template argument value '%0' truncated to '%1' for "
	"template parameter of type %2">, InGroup<Conversion>, DefaultIgnore;
	def err_template_arg_no_ref_bind : Error<
	"non-type template parameter of reference type "
	"%diff{$ cannot bind to template argument of type $"
	"\|cannot bind to template of incompatible argument type}0,1">;
	def err_template_arg_ref_bind_ignores_quals : Error<
	"reference binding of non-type template parameter "
	"%diff{of type $ to template argument of type $\|to template argument}0,1 "
	"ignores qualifiers">;
	def err_template_arg_not_decl_ref : Error<
	"non-type template argument does not refer to any declaration">;
	def err_template_arg_not_address_of : Error<
	"non-type template argument for template parameter of pointer type %0 must "
	"have its address taken">;
	def err_template_arg_address_of_non_pointer : Error<
	"address taken in non-type template argument for template parameter of "
	"reference type %0">;
	def err_template_arg_reference_var : Error<
	"non-type template argument of reference type %0 is not an object">;
	def err_template_arg_field : Error<
	"non-type template argument refers to non-static data member %0">;
	def err_template_arg_method : Error<
	"non-type template argument refers to non-static member function %0">;
	def err_template_arg_object_no_linkage : Error<
	"non-type template argument refers to %select{function\|object}0 %1 that "
	"does not have linkage">;
	def warn_cxx98_compat_template_arg_object_internal : Warning<
	"non-type template argument referring to %select{function\|object}0 %1 with "
	"internal linkage is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def ext_template_arg_object_internal : ExtWarn<
	"non-type template argument referring to %select{function\|object}0 %1 with "
	"internal linkage is a C++11 extension">, InGroup<CXX11>;
	def err_template_arg_thread_local : Error<
	"non-type template argument refers to thread-local object">;
	def note_template_arg_internal_object : Note<
	"non-type template argument refers to %select{function\|object}0 here">;
	def note_template_arg_refers_here : Note<
	"non-type template argument refers here">;
	def err_template_arg_not_object_or_func : Error<
	"non-type template argument does not refer to an object or function">;
	def err_template_arg_not_pointer_to_member_form : Error<
	"non-type template argument is not a pointer to member constant">;
	def err_template_arg_member_ptr_base_derived_not_supported : Error<
	"sorry, non-type template argument of pointer-to-member type %1 that refers "
	"to member %q0 of a different class is not supported yet">;
	def err_template_arg_invalid : Error<
	"non-type template argument '%0' is invalid">;
	def ext_template_arg_extra_parens : ExtWarn<
	"address non-type template argument cannot be surrounded by parentheses">;
	def warn_cxx98_compat_template_arg_extra_parens : Warning<
	"redundant parentheses surrounding address non-type template argument are "
	"incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
	def err_pointer_to_member_type : Error<
	"invalid use of pointer to member type after %select{.\|->}0">;
	def err_pointer_to_member_call_drops_quals : Error<
	"call to pointer to member function of type %0 drops '%1' qualifier%s2">;
	def err_pointer_to_member_oper_value_classify: Error<
	"pointer-to-member function type %0 can only be called on an "
	"%select{rvalue\|lvalue}1">;
	def ext_pointer_to_const_ref_member_on_rvalue : Extension<
	"invoking a pointer to a 'const &' member function on an rvalue is a C++20 extension">,
	InGroup<CXX20>, SFINAEFailure;
	def warn_cxx17_compat_pointer_to_const_ref_member_on_rvalue : Warning<
	"invoking a pointer to a 'const &' member function on an rvalue is "
	"incompatible with C++ standards before C++20">,
	InGroup<CXXPre20CompatPedantic>, DefaultIgnore;
	def ext_ms_deref_template_argument: ExtWarn<
	"non-type template argument containing a dereference operation is a "
	"Microsoft extension">, InGroup<MicrosoftTemplate>;
	def ext_ms_delayed_template_argument: ExtWarn<
	"using the undeclared type %0 as a default template argument is a "
	"Microsoft extension">, InGroup<MicrosoftTemplate>;
	def err_template_arg_deduced_incomplete_pack : Error<
	"deduced incomplete pack %0 for template parameter %1">;

	// C++ template specialization
	def err_template_spec_unknown_kind : Error<
	"can only provide an explicit specialization for a class template, function "
	"template, variable template, or a member function, static data member, "
	"%select{or member class\|member class, or member enumeration}0 of a "
	"class template">;
	def note_specialized_entity : Note<
	"explicitly specialized declaration is here">;
	def err_template_spec_decl_function_scope : Error<
	"explicit specialization of %0 in function scope">;
	def err_template_spec_decl_friend : Error<
	"cannot declare an explicit specialization in a friend">;
	def err_template_spec_redecl_out_of_scope : Error<
	"%select{class template\|class template partial\|variable template\|"
	"variable template partial\|function template\|member "
	"function\|static data member\|member class\|member enumeration}0 "
	"specialization of %1 not in %select{a namespace enclosing %2\|"
	"class %2 or an enclosing namespace}3">;
	def ext_ms_template_spec_redecl_out_of_scope: ExtWarn<
	"%select{class template\|class template partial\|variable template\|"
	"variable template partial\|function template\|member "
	"function\|static data member\|member class\|member enumeration}0 "
	"specialization of %1 not in %select{a namespace enclosing %2\|"
	"class %2 or an enclosing namespace}3 "
	"is a Microsoft extension">, InGroup<MicrosoftTemplate>;
	def err_template_spec_redecl_global_scope : Error<
	"%select{class template\|class template partial\|variable template\|"
	"variable template partial\|function template\|member "
	"function\|static data member\|member class\|member enumeration}0 "
	"specialization of %1 must occur at global scope">;
	def err_spec_member_not_instantiated : Error<
	"specialization of member %q0 does not specialize an instantiated member">;
	def note_specialized_decl : Note<"attempt to specialize declaration here">;
	def err_specialization_after_instantiation : Error<
	"explicit specialization of %0 after instantiation">;
	def note_instantiation_required_here : Note<
	"%select{implicit\|explicit}0 instantiation first required here">;
	def err_template_spec_friend : Error<
	"template specialization declaration cannot be a friend">;
	def err_template_spec_default_arg : Error<
	"default argument not permitted on an explicit "
	"%select{instantiation\|specialization}0 of function %1">;
	def err_not_class_template_specialization : Error<
	"cannot specialize a %select{dependent template\|template template "
	"parameter}0">;
	def ext_explicit_specialization_storage_class : ExtWarn<
	"explicit specialization cannot have a storage class">;
	def err_explicit_specialization_inconsistent_storage_class : Error<
	"explicit specialization has extraneous, inconsistent storage class "
	"'%select{none\|extern\|static\|__private_extern__\|auto\|register}0'">;
	def err_dependent_function_template_spec_no_match : Error<
	"no candidate function template was found for dependent"
	" friend function template specialization">;
	def note_dependent_function_template_spec_discard_reason : Note<
	"candidate ignored: %select{not a function template"
	"\|not a member of the enclosing namespace;"
	" did you mean to explicitly qualify the specialization?}0">;

	// C++ class template specializations and out-of-line definitions
	def err_template_spec_needs_header : Error<
	"template specialization requires 'template<>'">;
	def err_template_spec_needs_template_parameters : Error<
	"template specialization or definition requires a template parameter list "
	"corresponding to the nested type %0">;
	def err_template_param_list_matches_nontemplate : Error<
	"template parameter list matching the non-templated nested type %0 should "
	"be empty ('template<>')">;
	def err_alias_template_extra_headers : Error<
	"extraneous template parameter list in alias template declaration">;
	def err_template_spec_extra_headers : Error<
	"extraneous template parameter list in template specialization or "
	"out-of-line template definition">;
	def warn_template_spec_extra_headers : Warning<
	"extraneous template parameter list in template specialization">;
	def note_explicit_template_spec_does_not_need_header : Note<
	"'template<>' header not required for explicitly-specialized class %0 "
	"declared here">;
	def err_template_qualified_declarator_no_match : Error<
	"nested name specifier '%0' for declaration does not refer into a class, "
	"class template or class template partial specialization">;
	def err_specialize_member_of_template : Error<
	"cannot specialize %select{\|(with 'template<>') }0a member of an "
	"unspecialized template">;

	// C++ Class Template Partial Specialization
	def err_default_arg_in_partial_spec : Error<
	"default template argument in a class template partial specialization">;
	def err_dependent_non_type_arg_in_partial_spec : Error<
	"type of specialized non-type template argument depends on a template "
	"parameter of the partial specialization">;
	def note_dependent_non_type_default_arg_in_partial_spec : Note<
	"template parameter is used in default argument declared here">;
	def err_dependent_typed_non_type_arg_in_partial_spec : Error<
	"non-type template argument specializes a template parameter with "
	"dependent type %0">;
	def err_partial_spec_args_match_primary_template : Error<
	"%select{class\|variable}0 template partial specialization does not "
	"specialize any template argument; to %select{declare\|define}1 the "
	"primary template, remove the template argument list">;
	def ext_partial_spec_not_more_specialized_than_primary : ExtWarn<
	"%select{class\|variable}0 template partial specialization is not "
	"more specialized than the primary template">, DefaultError,
	InGroup<DiagGroup<"invalid-partial-specialization">>;
	def note_partial_spec_not_more_specialized_than_primary : Note<"%0">;
	def ext_partial_specs_not_deducible : ExtWarn<
	"%select{class\|variable}0 template partial specialization contains "
	"%select{a template parameter\|template parameters}1 that cannot be "
	"deduced; this partial specialization will never be used">,
	DefaultError, InGroup<DiagGroup<"unusable-partial-specialization">>;
	def note_non_deducible_parameter : Note<
	"non-deducible template parameter %0">;
	def err_partial_spec_ordering_ambiguous : Error<
	"ambiguous partial specializations of %0">;
	def note_partial_spec_match : Note<"partial specialization matches %0">;
	def err_partial_spec_redeclared : Error<
	"class template partial specialization %0 cannot be redeclared">;
	def note_prev_partial_spec_here : Note<
	"previous declaration of class template partial specialization %0 is here">;
	def err_partial_spec_fully_specialized : Error<
	"partial specialization of %0 does not use any of its template parameters">;

	// C++ Variable Template Partial Specialization
	def err_var_partial_spec_redeclared : Error<
	"variable template partial specialization %0 cannot be redefined">;
	def note_var_prev_partial_spec_here : Note<
	"previous declaration of variable template partial specialization is here">;
	def err_var_spec_no_template : Error<
	"no variable template matches%select{\| partial}0 specialization">;
	def err_var_spec_no_template_but_method : Error<
	"no variable template matches specialization; "
	"did you mean to use %0 as function template instead?">;

	// C++ Function template specializations
	def err_function_template_spec_no_match : Error<
	"no function template matches function template specialization %0">;
	def err_function_template_spec_ambiguous : Error<
	"function template specialization %0 ambiguously refers to more than one "
	"function template; explicitly specify%select{\| additional}1 template "
	"arguments to identify a particular function template">;
	def note_function_template_spec_matched : Note<
	"function template %q0 matches specialization %1">;
	def err_function_template_partial_spec : Error<
	"function template partial specialization is not allowed">;

	// C++ Template Instantiation
	def err_template_recursion_depth_exceeded : Error<
	"recursive template instantiation exceeded maximum depth of %0">,
	DefaultFatal, NoSFINAE;
	def err_constraint_depends_on_self : Error<
	"satisfaction of constraint '%0' depends on itself">, NoSFINAE;
	def note_template_recursion_depth : Note<
	"use -ftemplate-depth=N to increase recursive template instantiation depth">;

	def err_template_instantiate_within_definition : Error<
	"%select{implicit\|explicit}0 instantiation of template %1 within its"
	" own definition">;
	def err_template_instantiate_undefined : Error<
	"%select{implicit\|explicit}0 instantiation of undefined template %1">;
	def err_implicit_instantiate_member_undefined : Error<
	"implicit instantiation of undefined member %0">;
	def note_template_class_instantiation_was_here : Note<
	"class template %0 was instantiated here">;
	def note_template_class_explicit_specialization_was_here : Note<
	"class template %0 was explicitly specialized here">;
	def note_template_class_instantiation_here : Note<
	"in instantiation of template class %q0 requested here">;
	def note_template_member_class_here : Note<
	"in instantiation of member class %q0 requested here">;
	def note_template_member_function_here : Note<
	"in instantiation of member function %q0 requested here">;
	def note_function_template_spec_here : Note<
	"in instantiation of function template specialization %q0 requested here">;
	def note_template_static_data_member_def_here : Note<
	"in instantiation of static data member %q0 requested here">;
	def note_template_variable_def_here : Note<
	"in instantiation of variable template specialization %q0 requested here">;
	def note_template_enum_def_here : Note<
	"in instantiation of enumeration %q0 requested here">;
	def note_template_nsdmi_here : Note<
	"in instantiation of default member initializer %q0 requested here">;
	def note_template_type_alias_instantiation_here : Note<
	"in instantiation of template type alias %0 requested here">;
	def note_template_exception_spec_instantiation_here : Note<
	"in instantiation of exception specification for %0 requested here">;
	def note_template_requirement_instantiation_here : Note<
	"in instantiation of requirement here">;
	def note_template_requirement_params_instantiation_here : Note<
	"in instantiation of requirement parameters here">;
	def warn_var_template_missing : Warning<"instantiation of variable %q0 "
	"required here, but no definition is available">,
	InGroup<UndefinedVarTemplate>;
	def warn_func_template_missing : Warning<"instantiation of function %q0 "
	"required here, but no definition is available">,
	InGroup<UndefinedFuncTemplate>, DefaultIgnore;
	def note_forward_template_decl : Note<
	"forward declaration of template entity is here">;
	def note_inst_declaration_hint : Note<"add an explicit instantiation "
	"declaration to suppress this warning if %q0 is explicitly instantiated in "
	"another translation unit">;
	def note_evaluating_exception_spec_here : Note<
	"in evaluation of exception specification for %q0 needed here">;

	def note_default_arg_instantiation_here : Note<
	"in instantiation of default argument for '%0' required here">;
	def note_default_function_arg_instantiation_here : Note<
	"in instantiation of default function argument expression "
	"for '%0' required here">;
	def note_explicit_template_arg_substitution_here : Note<
	"while substituting explicitly-specified template arguments into function "
	"template %0 %1">;
	def note_function_template_deduction_instantiation_here : Note<
	"while substituting deduced template arguments into function template %0 "
	"%1">;
	def note_deduced_template_arg_substitution_here : Note<
	"during template argument deduction for %select{class\|variable}0 template "
	"%select{partial specialization \|}1%2 %3">;
	def note_prior_template_arg_substitution : Note<
	"while substituting prior template arguments into %select{non-type\|template}0"
	" template parameter%1 %2">;
	def note_template_default_arg_checking : Note<
	"while checking a default template argument used here">;
	def note_concept_specialization_here : Note<
	"while checking the satisfaction of concept '%0' requested here">;
	def note_nested_requirement_here : Note<
	"while checking the satisfaction of nested requirement requested here">;
	def note_checking_constraints_for_template_id_here : Note<
	"while checking constraint satisfaction for template '%0' required here">;
	def note_checking_constraints_for_var_spec_id_here : Note<
	"while checking constraint satisfaction for variable template "
	"partial specialization '%0' required here">;
	def note_checking_constraints_for_class_spec_id_here : Note<
	"while checking constraint satisfaction for class template partial "
	"specialization '%0' required here">;
	def note_checking_constraints_for_function_here : Note<
	"while checking constraint satisfaction for function '%0' required here">;
	def note_constraint_substitution_here : Note<
	"while substituting template arguments into constraint expression here">;
	def note_constraint_normalization_here : Note<
	"while calculating associated constraint of template '%0' here">;
	def note_parameter_mapping_substitution_here : Note<
	"while substituting into concept arguments here; substitution failures not "
	"allowed in concept arguments">;
	def note_instantiation_contexts_suppressed : Note<
	"(skipping %0 context%s0 in backtrace; use -ftemplate-backtrace-limit=0 to "
	"see all)">;

	def err_field_instantiates_to_function : Error<
	"data member instantiated with function type %0">;
	def err_variable_instantiates_to_function : Error<
	"%select{variable\|static data member}0 instantiated with function type %1">;
	def err_nested_name_spec_non_tag : Error<
	"type %0 cannot be used prior to '::' because it has no members">;

	def err_using_pack_expansion_empty : Error<
	"%select{\|member}0 using declaration %1 instantiates to an empty pack">;

	// C++ Explicit Instantiation
	def err_explicit_instantiation_duplicate : Error<
	"duplicate explicit instantiation of %0">;
	def ext_explicit_instantiation_duplicate : ExtWarn<
	"duplicate explicit instantiation of %0 ignored as a Microsoft extension">,
	InGroup<MicrosoftTemplate>;
	def note_previous_explicit_instantiation : Note<
	"previous explicit instantiation is here">;
	def warn_explicit_instantiation_after_specialization : Warning<
	"explicit instantiation of %0 that occurs after an explicit "
	"specialization has no effect">,
	InGroup<DiagGroup<"instantiation-after-specialization">>;
	def note_previous_template_specialization : Note<
	"previous template specialization is here">;
	def err_explicit_instantiation_nontemplate_type : Error<
	"explicit instantiation of non-templated type %0">;
	def note_nontemplate_decl_here : Note<
	"non-templated declaration is here">;
	def err_explicit_instantiation_in_class : Error<
	"explicit instantiation of %0 in class scope">;
	def err_explicit_instantiation_out_of_scope : Error<
	"explicit instantiation of %0 not in a namespace enclosing %1">;
	def err_explicit_instantiation_must_be_global : Error<
	"explicit instantiation of %0 must occur at global scope">;
	def warn_explicit_instantiation_out_of_scope_0x : Warning<
	"explicit instantiation of %0 not in a namespace enclosing %1">,
	InGroup<CXX11Compat>, DefaultIgnore;
	def warn_explicit_instantiation_must_be_global_0x : Warning<
	"explicit instantiation of %0 must occur at global scope">,
	InGroup<CXX11Compat>, DefaultIgnore;

	def err_explicit_instantiation_requires_name : Error<
	"explicit instantiation declaration requires a name">;
	def err_explicit_instantiation_of_typedef : Error<
	"explicit instantiation of typedef %0">;
	def err_explicit_instantiation_storage_class : Error<
	"explicit instantiation cannot have a storage class">;
	def err_explicit_instantiation_internal_linkage : Error<
	"explicit instantiation declaration of %0 with internal linkage">;
	def err_explicit_instantiation_not_known : Error<
	"explicit instantiation of %0 does not refer to a function template, "
	"variable template, member function, member class, or static data member">;
	def note_explicit_instantiation_here : Note<
	"explicit instantiation refers here">;
	def err_explicit_instantiation_data_member_not_instantiated : Error<
	"explicit instantiation refers to static data member %q0 that is not an "
	"instantiation">;
	def err_explicit_instantiation_member_function_not_instantiated : Error<
	"explicit instantiation refers to member function %q0 that is not an "
	"instantiation">;
	def err_explicit_instantiation_ambiguous : Error<
	"partial ordering for explicit instantiation of %0 is ambiguous">;
	def note_explicit_instantiation_candidate : Note<
	"explicit instantiation candidate function %q0 template here %1">;
	def err_explicit_instantiation_inline : Error<
	"explicit instantiation cannot be 'inline'">;
	def warn_explicit_instantiation_inline_0x : Warning<
	"explicit instantiation cannot be 'inline'">, InGroup<CXX11Compat>,
	DefaultIgnore;
	def err_explicit_instantiation_constexpr : Error<
	"explicit instantiation cannot be 'constexpr'">;
	def ext_explicit_instantiation_without_qualified_id : Extension<
	"qualifier in explicit instantiation of %q0 requires a template-id "
	"(a typedef is not permitted)">;
	def err_explicit_instantiation_without_template_id : Error<
	"explicit instantiation of %q0 must specify a template argument list">;
	def err_explicit_instantiation_unqualified_wrong_namespace : Error<
	"explicit instantiation of %q0 must occur in namespace %1">;
	def warn_explicit_instantiation_unqualified_wrong_namespace_0x : Warning<
	"explicit instantiation of %q0 must occur in namespace %1">,
	InGroup<CXX11Compat>, DefaultIgnore;
	def err_explicit_instantiation_undefined_member : Error<
	"explicit instantiation of undefined %select{member class\|member function\|"
	"static data member}0 %1 of class template %2">;
	def err_explicit_instantiation_undefined_func_template : Error<
	"explicit instantiation of undefined function template %0">;
	def err_explicit_instantiation_undefined_var_template : Error<
	"explicit instantiation of undefined variable template %q0">;
	def err_explicit_instantiation_declaration_after_definition : Error<
	"explicit instantiation declaration (with 'extern') follows explicit "
	"instantiation definition (without 'extern')">;
	def note_explicit_instantiation_definition_here : Note<
	"explicit instantiation definition is here">;
	def err_invalid_var_template_spec_type : Error<"type %2 "
	"of %select{explicit instantiation\|explicit specialization\|"
	"partial specialization\|redeclaration}0 of %1 does not match"
	" expected type %3">;
	def err_mismatched_exception_spec_explicit_instantiation : Error<
	"exception specification in explicit instantiation does not match "
	"instantiated one">;
	def ext_mismatched_exception_spec_explicit_instantiation : ExtWarn<
	err_mismatched_exception_spec_explicit_instantiation.Summary>,
	InGroup<MicrosoftExceptionSpec>;
	def err_explicit_instantiation_dependent : Error<
	"explicit instantiation has dependent template arguments">;

	// C++ typename-specifiers
	def err_typename_nested_not_found : Error<"no type named %0 in %1">;
	def err_typename_nested_not_found_enable_if : Error<
	"no type named 'type' in %0; 'enable_if' cannot be used to disable "
	"this declaration">;
	def err_typename_nested_not_found_requirement : Error<
	"failed requirement '%0'; 'enable_if' cannot be used to disable this "
	"declaration">;
	def err_typename_nested_not_type : Error<
	"typename specifier refers to non-type member %0 in %1">;
	def err_typename_not_type : Error<
	"typename specifier refers to non-type %0">;
	def note_typename_member_refers_here : Note<
	"referenced member %0 is declared here">;
	def note_typename_refers_here : Note<
	"referenced %0 is declared here">;
	def err_typename_missing : Error<
	"missing 'typename' prior to dependent type name '%0%1'">;
	def err_typename_missing_template : Error<
	"missing 'typename' prior to dependent type template name '%0%1'">;
	def ext_typename_missing : ExtWarn<
	"missing 'typename' prior to dependent type name '%0%1'">,
	InGroup<DiagGroup<"typename-missing">>;
	def ext_typename_outside_of_template : ExtWarn<
	"'typename' occurs outside of a template">, InGroup<CXX11>;
	def warn_cxx98_compat_typename_outside_of_template : Warning<
	"use of 'typename' outside of a template is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_typename_refers_to_using_value_decl : Error<
	"typename specifier refers to a dependent using declaration for a value "
	"%0 in %1">;
	def note_using_value_decl_missing_typename : Note<
	"add 'typename' to treat this using declaration as a type">;
	def warn_cxx17_compat_implicit_typename : Warning<"use of implicit 'typename' is "
	"incompatible with C++ standards before C++20">, InGroup<CXX20Compat>,
	DefaultIgnore;
	def ext_implicit_typename : ExtWarn<"missing 'typename' prior to dependent "
	"type name %0%1; implicit 'typename' is a C++20 extension">,
	InGroup<CXX20>;

	def err_template_kw_refers_to_non_template : Error<
	"%0%select{\| following the 'template' keyword}1 "
	"does not refer to a template">;
	def note_template_kw_refers_to_non_template : Note<
	"declared as a non-template here">;
	def err_template_kw_refers_to_dependent_non_template : Error<
	"%0%select{\| following the 'template' keyword}1 "
	"cannot refer to a dependent template">;
	def err_template_kw_refers_to_class_template : Error<
	"'%0%1' instantiated to a class template, not a function template">;
	def note_referenced_class_template : Note<
	"class template declared here">;
	def err_template_kw_missing : Error<
	"missing 'template' keyword prior to dependent template name '%0%1'">;
	def ext_template_outside_of_template : ExtWarn<
	"'template' keyword outside of a template">, InGroup<CXX11>;
	def warn_cxx98_compat_template_outside_of_template : Warning<
	"use of 'template' keyword outside of a template is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;

	def err_non_type_template_in_nested_name_specifier : Error<
	"qualified name refers into a specialization of %select{function\|variable}0 "
	"template %1">;
	def err_template_id_not_a_type : Error<
	"template name refers to non-type template %0">;
	def note_template_declared_here : Note<
	"%select{function template\|class template\|variable template"
	"\|type alias template\|template template parameter}0 "
	"%1 declared here">;
	def err_template_expansion_into_fixed_list : Error<
	"pack expansion used as argument for non-pack parameter of %select{alias "
	"template\|concept}0">;
	def note_parameter_type : Note<
	"parameter of type %0 is declared here">;

	// C++11 Variadic Templates
	def err_template_param_pack_default_arg : Error<
	"template parameter pack cannot have a default argument">;
	def err_template_param_pack_must_be_last_template_parameter : Error<
	"template parameter pack must be the last template parameter">;

	def err_template_parameter_pack_non_pack : Error<
	"%select{template type\|non-type template\|template template}0 parameter"
	"%select{\| pack}1 conflicts with previous %select{template type\|"
	"non-type template\|template template}0 parameter%select{ pack\|}1">;
	def note_template_parameter_pack_non_pack : Note<
	"%select{template type\|non-type template\|template template}0 parameter"
	"%select{\| pack}1 does not match %select{template type\|non-type template"
	"\|template template}0 parameter%select{ pack\|}1 in template argument">;
	def note_template_parameter_pack_here : Note<
	"previous %select{template type\|non-type template\|template template}0 "
	"parameter%select{\| pack}1 declared here">;

	def err_unexpanded_parameter_pack : Error<
	"%select{expression\|base type\|declaration type\|data member type\|bit-field "
	"size\|static assertion\|fixed underlying type\|enumerator value\|"
	"using declaration\|friend declaration\|qualifier\|initializer\|default argument\|"
	"non-type template parameter type\|exception type\|partial specialization\|"
	"__if_exists name\|__if_not_exists name\|lambda\|block\|type constraint\|"
	"requirement\|requires clause}0 "
	"contains%plural{0: an\|:}1 unexpanded parameter pack"
	"%plural{0:\|1: %2\|2:s %2 and %3\|:s %2, %3, ...}1">;

	def err_pack_expansion_without_parameter_packs : Error<
	"pack expansion does not contain any unexpanded parameter packs">;
	def err_pack_expansion_length_conflict : Error<
	"pack expansion contains parameter packs %0 and %1 that have different "
	"lengths (%2 vs. %3)">;
	def err_pack_expansion_length_conflict_multilevel : Error<
	"pack expansion contains parameter pack %0 that has a different "
	"length (%1 vs. %2) from outer parameter packs">;
	def err_pack_expansion_length_conflict_partial : Error<
	"pack expansion contains parameter pack %0 that has a different "
	"length (at least %1 vs. %2) from outer parameter packs">;
	def err_pack_expansion_member_init : Error<
	"pack expansion for initialization of member %0">;

	def err_function_parameter_pack_without_parameter_packs : Error<
	"type %0 of function parameter pack does not contain any unexpanded "
	"parameter packs">;
	def err_ellipsis_in_declarator_not_parameter : Error<
	"only function and template parameters can be parameter packs">;

	def err_sizeof_pack_no_pack_name : Error<
	"%0 does not refer to the name of a parameter pack">;

	def err_fold_expression_packs_both_sides : Error<
	"binary fold expression has unexpanded parameter packs in both operands">;
	def err_fold_expression_empty : Error<
	"unary fold expression has empty expansion for operator '%0' "
	"with no fallback value">;
	def err_fold_expression_bad_operand : Error<
	"expression not permitted as operand of fold expression">;
	def err_fold_expression_limit_exceeded: Error<
	"instantiating fold expression with %0 arguments exceeded expression nesting "
	"limit of %1">, DefaultFatal, NoSFINAE;

	def err_unexpected_typedef : Error<
	"unexpected type name %0: expected expression">;
	def err_unexpected_namespace : Error<
	"unexpected namespace name %0: expected expression">;
	def err_undeclared_var_use : Error<"use of undeclared identifier %0">;
	def ext_undeclared_unqual_id_with_dependent_base : ExtWarn<
	"use of undeclared identifier %0; "
	"unqualified lookup into dependent bases of class template %1 is a Microsoft extension">,
	InGroup<MicrosoftTemplate>;
	def err_found_in_dependent_base : Error<
	"explicit qualification required to use member %0 from dependent base class">;
	def ext_found_in_dependent_base : ExtWarn<"use of member %0 "
	"found via unqualified lookup into dependent bases of class templates is a "
	"Microsoft extension">, InGroup<MicrosoftTemplate>;
	def err_found_later_in_class : Error<"member %0 used before its declaration">;
	def ext_found_later_in_class : ExtWarn<
	"use of member %0 before its declaration is a Microsoft extension">,
	InGroup<MicrosoftTemplate>;
	def ext_unqualified_base_class : ExtWarn<
	"unqualified base initializer of class templates is a Microsoft extension">,
	InGroup<MicrosoftTemplate>;
	def note_dependent_member_use : Note<
	"must qualify identifier to find this declaration in dependent base class">;
	def err_not_found_by_two_phase_lookup : Error<"call to function %0 that is neither "
	"visible in the template definition nor found by argument-dependent lookup">;
	def note_not_found_by_two_phase_lookup : Note<"%0 should be declared prior to the "
	"call site%select{\| or in %2\| or in an associated namespace of one of its arguments}1">;
	def err_undeclared_use : Error<"use of undeclared %0">;
	def warn_deprecated : Warning<"%0 is deprecated">,
	InGroup<DeprecatedDeclarations>;
	def note_from_diagnose_if : Note<"from 'diagnose_if' attribute on %0:">;
	def warn_property_method_deprecated :
	Warning<"property access is using %0 method which is deprecated">,
	InGroup<DeprecatedDeclarations>;
	def warn_deprecated_message : Warning<"%0 is deprecated: %1">,
	InGroup<DeprecatedDeclarations>;
	def warn_deprecated_anonymous_namespace : Warning<
	"'deprecated' attribute on anonymous namespace ignored">,
	InGroup<IgnoredAttributes>;
	def warn_deprecated_fwdclass_message : Warning<
	"%0 may be deprecated because the receiver type is unknown">,
	InGroup<DeprecatedDeclarations>;
	def warn_deprecated_def : Warning<
	"implementing deprecated %select{method\|class\|category}0">,
	InGroup<DeprecatedImplementations>, DefaultIgnore;
	def warn_unavailable_def : Warning<
	"implementing unavailable method">,
	InGroup<DeprecatedImplementations>, DefaultIgnore;
	def warn_deprecated_builtin : Warning<
	"builtin %0 is deprecated; use %1 instead">,
	InGroup<DeprecatedBuiltins>;
	def err_unavailable : Error<"%0 is unavailable">;
	def err_property_method_unavailable :
	Error<"property access is using %0 method which is unavailable">;
	def err_unavailable_message : Error<"%0 is unavailable: %1">;
	def warn_unavailable_fwdclass_message : Warning<
	"%0 may be unavailable because the receiver type is unknown">,
	InGroup<UnavailableDeclarations>;
	def note_availability_specified_here : Note<
	"%0 has been explicitly marked "
	"%select{unavailable\|deleted\|deprecated}1 here">;
	def note_partial_availability_specified_here : Note<
	"%0 has been marked as being introduced in %1 %2 here, "
	"but the deployment target is %1 %3">;
	def note_implicitly_deleted : Note<
	"explicitly defaulted function was implicitly deleted here">;
	def warn_not_enough_argument : Warning<
	"not enough variable arguments in %0 declaration to fit a sentinel">,
	InGroup<Sentinel>;
	def warn_missing_sentinel : Warning <
	"missing sentinel in %select{function call\|method dispatch\|block call}0">,
	InGroup<Sentinel>;
	def note_sentinel_here : Note<
	"%select{function\|method\|block}0 has been explicitly marked sentinel here">;
	def warn_strict_uses_without_prototype : Warning<
	"passing arguments to %select{a function\|%1}0 without a prototype is "
	"deprecated in all versions of C and is not supported in C2x">,
	InGroup<DeprecatedNonPrototype>;
	def warn_missing_prototype : Warning<
	"no previous prototype for function %0">,
	InGroup<DiagGroup<"missing-prototypes">>, DefaultIgnore;
	def note_declaration_not_a_prototype : Note<
	"this declaration is not a prototype; add %select{'void'\|parameter declarations}0 "
	"to make it %select{a prototype for a zero-parameter function\|one}0">;
	// This is not actually an extension, but we only want it to be enabled in
	// -pedantic mode and this is the most direct way of accomplishing that.
	def warn_strict_prototypes : Extension<
	"a %select{function\|block}0 declaration without a prototype is deprecated "
	"%select{in all versions of C\|}0">, InGroup<StrictPrototypes>;
	def warn_non_prototype_changes_behavior : Warning<
	"a function %select{declaration\|definition}0 without a prototype is "
	"deprecated in all versions of C %select{and is not supported in C2x\|and is "
	"treated as a zero-parameter prototype in C2x, conflicting with a "
	"%select{previous\|subsequent}2 %select{declaration\|definition}3}1">,
	InGroup<DeprecatedNonPrototype>;
	def note_conflicting_prototype : Note<"conflicting prototype is here">;
	def warn_missing_variable_declarations : Warning<
	"no previous extern declaration for non-static variable %0">,
	InGroup<DiagGroup<"missing-variable-declarations">>, DefaultIgnore;
	def note_static_for_internal_linkage : Note<
	"declare 'static' if the %select{variable\|function}0 is not intended to be "
	"used outside of this translation unit">;
	def err_static_data_member_reinitialization :
	Error<"static data member %0 already has an initializer">;
	def err_redefinition : Error<"redefinition of %0">;
	def err_alias_after_tentative :
	Error<"alias definition of %0 after tentative definition">;
	def err_alias_is_definition :
	Error<"definition %0 cannot also be an %select{alias\|ifunc}1">;
	def err_definition_of_implicitly_declared_member : Error<
	"definition of implicitly declared %select{default constructor\|copy "
	"constructor\|move constructor\|copy assignment operator\|move assignment "
	"operator\|destructor\|function}1">;
	def err_definition_of_explicitly_defaulted_member : Error<
	"definition of explicitly defaulted %select{default constructor\|copy "
	"constructor\|move constructor\|copy assignment operator\|move assignment "
	"operator\|destructor\|function}0">;
	def err_redefinition_extern_inline : Error<
	"redefinition of a 'extern inline' function %0 is not supported in "
	"%select{C99 mode\|C++}1">;
	def warn_attr_abi_tag_namespace : Warning<
	"'abi_tag' attribute on %select{non-inline\|anonymous}0 namespace ignored">,
	InGroup<IgnoredAttributes>;
	def err_abi_tag_on_redeclaration : Error<
	"cannot add 'abi_tag' attribute in a redeclaration">;
	def err_new_abi_tag_on_redeclaration : Error<
	"'abi_tag' %0 missing in original declaration">;
	def note_use_ifdef_guards : Note<
	"unguarded header; consider using #ifdef guards or #pragma once">;

	def warn_var_decl_not_read_only : Warning<
	"object of type %0 cannot be placed in read-only memory">,
	InGroup<ReadOnlyPlacementChecks>;
	def note_enforce_read_only_placement : Note<"type was declared read-only here">;


	def note_deleted_dtor_no_operator_delete : Note<
	"virtual destructor requires an unambiguous, accessible 'operator delete'">;
	def note_deleted_special_member_class_subobject : Note<
	"%select{default constructor of\|copy constructor of\|move constructor of\|"
	"copy assignment operator of\|move assignment operator of\|destructor of\|"
	"constructor inherited by}0 "
	"%1 is implicitly deleted because "
	"%select{base class %3\|%select{\|\|\|\|variant }4field %3}2 "
	"%select{has "
	"%select{no\|a deleted\|multiple\|an inaccessible\|a non-trivial}4 "
	"%select{%select{default constructor\|copy constructor\|move constructor\|copy "
	"assignment operator\|move assignment operator\|destructor\|"
	"%select{default\|corresponding\|default\|default\|default}4 constructor}0\|"
	"destructor}5"
	"%select{\|\|s\|\|}4"
	"\|is an ObjC pointer}6">;
	def note_deleted_default_ctor_uninit_field : Note<
	"%select{default constructor of\|constructor inherited by}0 "
	"%1 is implicitly deleted because field %2 of "
	"%select{reference\|const-qualified}4 type %3 would not be initialized">;
	def note_deleted_default_ctor_all_const : Note<
	"%select{default constructor of\|constructor inherited by}0 "
	"%1 is implicitly deleted because all "
	"%select{data members\|data members of an anonymous union member}2"
	" are const-qualified">;
	def note_deleted_copy_ctor_rvalue_reference : Note<
	"copy constructor of %0 is implicitly deleted because field %1 is of "
	"rvalue reference type %2">;
	def note_deleted_copy_user_declared_move : Note<
	"copy %select{constructor\|assignment operator}0 is implicitly deleted because"
	" %1 has a user-declared move %select{constructor\|assignment operator}2">;
	def note_deleted_assign_field : Note<
	"%select{copy\|move}0 assignment operator of %1 is implicitly deleted "
	"because field %2 is of %select{reference\|const-qualified}4 type %3">;

	// These should be errors.
	def warn_undefined_internal : Warning<
	"%select{function\|variable}0 %q1 has internal linkage but is not defined">,
	InGroup<DiagGroup<"undefined-internal">>;
	def err_undefined_internal_type : Error<
	"%select{function\|variable}0 %q1 is used but not defined in this "
	"translation unit, and cannot be defined in any other translation unit "
	"because its type does not have linkage">;
	def ext_undefined_internal_type : Extension<
	"ISO C++ requires a definition in this translation unit for "
	"%select{function\|variable}0 %q1 because its type does not have linkage">,
	InGroup<DiagGroup<"undefined-internal-type">>;
	def warn_undefined_inline : Warning<"inline function %q0 is not defined">,
	InGroup<DiagGroup<"undefined-inline">>;
	def err_undefined_inline_var : Error<"inline variable %q0 is not defined">;
	def note_used_here : Note<"used here">;

	def err_attribute_missing_on_first_decl : Error<
	"%0 attribute does not appear on the first declaration">;
	def warn_internal_linkage_local_storage : Warning<
	"'internal_linkage' attribute on a non-static local variable is ignored">,
	InGroup<IgnoredAttributes>;

	def ext_internal_in_extern_inline : ExtWarn<
	"static %select{function\|variable}0 %1 is used in an inline function with "
	"external linkage">, InGroup<StaticInInline>;
	def ext_internal_in_extern_inline_quiet : Extension<
	"static %select{function\|variable}0 %1 is used in an inline function with "
	"external linkage">, InGroup<StaticInInline>;
	def warn_static_local_in_extern_inline : Warning<
	"non-constant static local variable in inline function may be different "
	"in different files">, InGroup<StaticLocalInInline>;
	def note_convert_inline_to_static : Note<
	"use 'static' to give inline function %0 internal linkage">;

	def ext_redefinition_of_typedef : ExtWarn<
	"redefinition of typedef %0 is a C11 feature">,
	InGroup<DiagGroup<"typedef-redefinition"> >;
	def err_redefinition_variably_modified_typedef : Error<
	"redefinition of %select{typedef\|type alias}0 for variably-modified type %1">;

	def err_inline_decl_follows_def : Error<
	"inline declaration of %0 follows non-inline definition">;
	def err_inline_declaration_block_scope : Error<
	"inline declaration of %0 not allowed in block scope">;
	def err_static_non_static : Error<
	"static declaration of %0 follows non-static declaration">;
	def err_different_language_linkage : Error<
	"declaration of %0 has a different language linkage">;
	def ext_retained_language_linkage : Extension<
	"friend function %0 retaining previous language linkage is an extension">,
	InGroup<DiagGroup<"retained-language-linkage">>;
	def err_extern_c_global_conflict : Error<
	"declaration of %1 %select{with C language linkage\|in global scope}0 "
	"conflicts with declaration %select{in global scope\|with C language linkage}0">;
	def note_extern_c_global_conflict : Note<
	"declared %select{in global scope\|with C language linkage}0 here">;
	def note_extern_c_begins_here : Note<
	"extern \"C\" language linkage specification begins here">;
	def warn_weak_import : Warning <
	"an already-declared variable is made a weak_import declaration %0">;
	def ext_static_non_static : Extension<
	"redeclaring non-static %0 as static is a Microsoft extension">,
	InGroup<MicrosoftRedeclareStatic>;
	def err_non_static_static : Error<
	"non-static declaration of %0 follows static declaration">;
	def err_extern_non_extern : Error<
	"extern declaration of %0 follows non-extern declaration">;
	def err_non_extern_extern : Error<
	"non-extern declaration of %0 follows extern declaration">;
	def err_non_thread_thread : Error<
	"non-thread-local declaration of %0 follows thread-local declaration">;
	def err_thread_non_thread : Error<
	"thread-local declaration of %0 follows non-thread-local declaration">;
	def err_thread_thread_different_kind : Error<
	"thread-local declaration of %0 with %select{static\|dynamic}1 initialization "
	"follows declaration with %select{dynamic\|static}1 initialization">;
	def err_mismatched_owning_module : Error<
	"declaration of %0 in %select{the global module\|module %2}1 follows "
	"declaration in %select{the global module\|module %4}3">;
	def err_redefinition_different_type : Error<
	"redefinition of %0 with a different type%diff{: $ vs $\|}1,2">;
	def err_redefinition_different_kind : Error<
	"redefinition of %0 as different kind of symbol">;
	def err_redefinition_different_namespace_alias : Error<
	"redefinition of %0 as an alias for a different namespace">;
	def note_previous_namespace_alias : Note<
	"previously defined as an alias for %0">;
	def warn_forward_class_redefinition : Warning<
	"redefinition of forward class %0 of a typedef name of an object type is ignored">,
	InGroup<DiagGroup<"objc-forward-class-redefinition">>;
	def err_redefinition_different_typedef : Error<
	"%select{typedef\|type alias\|type alias template}0 "
	"redefinition with different types%diff{ ($ vs $)\|}1,2">;
	def err_redefinition_different_concept : Error<
	"redefinition of concept %0 with different template parameters or requirements">;
	def err_tag_reference_non_tag : Error<
	"%select{non-struct type\|non-class type\|non-union type\|non-enum "
	"type\|typedef\|type alias\|template\|type alias template\|template "
	"template argument}1 %0 cannot be referenced with a "
	"%select{struct\|interface\|union\|class\|enum}2 specifier">;
	def err_tag_reference_conflict : Error<
	"implicit declaration introduced by elaborated type conflicts with a "
	"%select{non-struct type\|non-class type\|non-union type\|non-enum "
	"type\|typedef\|type alias\|template\|type alias template\|template "
	"template argument}0 of the same name">;
	def err_dependent_tag_decl : Error<
	"%select{declaration\|definition}0 of "
	"%select{struct\|interface\|union\|class\|enum}1 in a dependent scope">;
	def err_tag_definition_of_typedef : Error<
	"definition of type %0 conflicts with %select{typedef\|type alias}1 of the same name">;
	def err_conflicting_types : Error<"conflicting types for %0">;
	def err_different_pass_object_size_params : Error<
	"conflicting pass_object_size attributes on parameters">;
	def err_late_asm_label_name : Error<
	"cannot apply asm label to %select{variable\|function}0 after its first use">;
	def err_different_asm_label : Error<"conflicting asm label">;
	def err_nested_redefinition : Error<"nested redefinition of %0">;
	def err_use_with_wrong_tag : Error<
	"use of %0 with tag type that does not match previous declaration">;
	def warn_struct_class_tag_mismatch : Warning<
	"%select{struct\|interface\|class}0%select{\| template}1 %2 was previously "
	"declared as a %select{struct\|interface\|class}3%select{\| template}1; "
	"this is valid, but may result in linker errors under the Microsoft C++ ABI">,
	InGroup<MismatchedTags>, DefaultIgnore;
	def warn_struct_class_previous_tag_mismatch : Warning<
	"%2 defined as %select{a struct\|an interface\|a class}0%select{\| template}1 "
	"here but previously declared as "
	"%select{a struct\|an interface\|a class}3%select{\| template}1; "
	"this is valid, but may result in linker errors under the Microsoft C++ ABI">,
	InGroup<MismatchedTags>, DefaultIgnore;
	def note_struct_class_suggestion : Note<
	"did you mean %select{struct\|interface\|class}0 here?">;
	def ext_forward_ref_enum : Extension<
	"ISO C forbids forward references to 'enum' types">;
	def err_forward_ref_enum : Error<
	"ISO C++ forbids forward references to 'enum' types">;
	def ext_ms_forward_ref_enum : ExtWarn<
	"forward references to 'enum' types are a Microsoft extension">,
	InGroup<MicrosoftEnumForwardReference>;
	def ext_forward_ref_enum_def : Extension<
	"redeclaration of already-defined enum %0 is a GNU extension">,
	InGroup<GNURedeclaredEnum>;

	def err_redefinition_of_enumerator : Error<"redefinition of enumerator %0">;
	def err_duplicate_member : Error<"duplicate member %0">;
	def err_misplaced_ivar : Error<
	"instance variables may not be placed in %select{categories\|class extension}0">;
	def warn_ivars_in_interface : Warning<
	"declaration of instance variables in the interface is deprecated">,
	InGroup<DiagGroup<"objc-interface-ivars">>, DefaultIgnore;
	def ext_enum_value_not_int : Extension<
	"ISO C restricts enumerator values to range of 'int' (%0 is too "
	"%select{small\|large}1)">;
	def ext_enum_too_large : ExtWarn<
	"enumeration values exceed range of largest integer">, InGroup<EnumTooLarge>;
	def ext_enumerator_increment_too_large : ExtWarn<
	"incremented enumerator value %0 is not representable in the "
	"largest integer type">, InGroup<EnumTooLarge>;
	def warn_flag_enum_constant_out_of_range : Warning<
	"enumeration value %0 is out of range of flags in enumeration type %1">,
	InGroup<FlagEnum>;

	def err_vm_decl_in_file_scope : Error<
	"variably modified type declaration not allowed at file scope">;
	def err_vm_decl_has_extern_linkage : Error<
	"variably modified type declaration cannot have 'extern' linkage">;
	def err_typecheck_field_variable_size : Error<
	"fields must have a constant size: 'variable length array in structure' "
	"extension will never be supported">;
	def err_vm_func_decl : Error<
	"function declaration cannot have variably modified type">;
	def err_array_too_large : Error<
	"array is too large (%0 elements)">;

	def err_typecheck_negative_array_size : Error<"array size is negative">;
	def warn_typecheck_function_qualifiers_ignored : Warning<
	"'%0' qualifier on function type %1 has no effect">,
	InGroup<IgnoredQualifiers>;
	def warn_typecheck_function_qualifiers_unspecified : Warning<
	"'%0' qualifier on function type %1 has unspecified behavior">;
	def warn_typecheck_reference_qualifiers : Warning<
	"'%0' qualifier on reference type %1 has no effect">,
	InGroup<IgnoredReferenceQualifiers>;
	def err_typecheck_invalid_restrict_not_pointer : Error<
	"restrict requires a pointer or reference (%0 is invalid)">;
	def err_typecheck_invalid_restrict_not_pointer_noarg : Error<
	"restrict requires a pointer or reference">;
	def err_typecheck_invalid_restrict_invalid_pointee : Error<
	"pointer to function type %0 may not be 'restrict' qualified">;
	def ext_typecheck_zero_array_size : Extension<
	"zero size arrays are an extension">, InGroup<ZeroLengthArray>;
	def err_typecheck_zero_array_size : Error<
	"zero-length arrays are not permitted in %select{C++\|SYCL device code}0">;
	def err_array_size_non_int : Error<"size of array has non-integer type %0">;
	def err_init_element_not_constant : Error<
	"initializer element is not a compile-time constant">;
	def ext_aggregate_init_not_constant : Extension<
	"initializer for aggregate is not a compile-time constant">, InGroup<C99>;
	def err_local_cant_init : Error<
	"'__local' variable cannot have an initializer">;
	def err_loader_uninitialized_cant_init
	: Error<"variable with 'loader_uninitialized' attribute cannot have an "
	"initializer">;
	def err_loader_uninitialized_trivial_ctor
	: Error<"variable with 'loader_uninitialized' attribute must have a "
	"trivial default constructor">;
	def err_loader_uninitialized_redeclaration
	: Error<"redeclaration cannot add 'loader_uninitialized' attribute">;
	def err_loader_uninitialized_extern_decl
	: Error<"variable %0 cannot be declared both 'extern' and with the "
	"'loader_uninitialized' attribute">;
	def err_block_extern_cant_init : Error<
	"declaration of block scope identifier with linkage cannot have an initializer">;
	def warn_extern_init : Warning<"'extern' variable has an initializer">,
	InGroup<DiagGroup<"extern-initializer">>;
	def err_variable_object_no_init : Error<
	"variable-sized object may not be initialized">;
	def err_excess_initializers : Error<
	"excess elements in %select{array\|vector\|scalar\|union\|struct}0 initializer">;
	def ext_excess_initializers : ExtWarn<
	"excess elements in %select{array\|vector\|scalar\|union\|struct}0 initializer">,
	InGroup<ExcessInitializers>;
	def err_excess_initializers_for_sizeless_type : Error<
	"excess elements in initializer for indivisible sizeless type %0">;
	def ext_excess_initializers_for_sizeless_type : ExtWarn<
	"excess elements in initializer for indivisible sizeless type %0">,
	InGroup<ExcessInitializers>;
	def err_excess_initializers_in_char_array_initializer : Error<
	"excess elements in char array initializer">;
	def ext_excess_initializers_in_char_array_initializer : ExtWarn<
	"excess elements in char array initializer">,
	InGroup<ExcessInitializers>;
	def err_initializer_string_for_char_array_too_long : Error<
	"initializer-string for char array is too long, array size is %0 but initializer has size %1 (including the null terminating character)">;
	def ext_initializer_string_for_char_array_too_long : ExtWarn<
	"initializer-string for char array is too long">,
	InGroup<ExcessInitializers>;
	def warn_missing_field_initializers : Warning<
	"missing field %0 initializer">,
	InGroup<MissingFieldInitializers>, DefaultIgnore;
	def warn_braces_around_init : Warning<
	"braces around %select{scalar \|}0initializer">,
	InGroup<DiagGroup<"braced-scalar-init">>;
	def ext_many_braces_around_init : ExtWarn<
	"too many braces around %select{scalar \|}0initializer">,
	InGroup<DiagGroup<"many-braces-around-scalar-init">>, SFINAEFailure;
	def ext_complex_component_init : Extension<
	"complex initialization specifying real and imaginary components "
	"is an extension">, InGroup<DiagGroup<"complex-component-init">>;
	def err_empty_scalar_initializer : Error<"scalar initializer cannot be empty">;
	def err_empty_sizeless_initializer : Error<
	"initializer for sizeless type %0 cannot be empty">;
	def warn_cxx98_compat_empty_scalar_initializer : Warning<
	"scalar initialized from empty initializer list is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def warn_cxx98_compat_empty_sizeless_initializer : Warning<
	"initializing %0 from an empty initializer list is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def warn_cxx98_compat_reference_list_init : Warning<
	"reference initialized from initializer list is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def warn_cxx98_compat_initializer_list_init : Warning<
	"initialization of initializer_list object is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def warn_cxx98_compat_ctor_list_init : Warning<
	"constructor call from initializer list is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_illegal_initializer : Error<
	"illegal initializer (only variables can be initialized)">;
	def err_illegal_initializer_type : Error<"illegal initializer type %0">;
	def ext_init_list_type_narrowing : ExtWarn<
	"type %0 cannot be narrowed to %1 in initializer list">,
	InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure;
	def ext_init_list_variable_narrowing : ExtWarn<
	"non-constant-expression cannot be narrowed from type %0 to %1 in "
	"initializer list">, InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure;
	def ext_init_list_constant_narrowing : ExtWarn<
	"constant expression evaluates to %0 which cannot be narrowed to type %1">,
	InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure;
	def warn_init_list_type_narrowing : Warning<
	"type %0 cannot be narrowed to %1 in initializer list in C++11">,
	InGroup<CXX11Narrowing>, DefaultIgnore;
	def warn_init_list_variable_narrowing : Warning<
	"non-constant-expression cannot be narrowed from type %0 to %1 in "
	"initializer list in C++11">,
	InGroup<CXX11Narrowing>, DefaultIgnore;
	def warn_init_list_constant_narrowing : Warning<
	"constant expression evaluates to %0 which cannot be narrowed to type %1 in "
	"C++11">,
	InGroup<CXX11Narrowing>, DefaultIgnore;
	def note_init_list_narrowing_silence : Note<
	"insert an explicit cast to silence this issue">;
	def err_init_objc_class : Error<
	"cannot initialize Objective-C class type %0">;
	def err_implicit_empty_initializer : Error<
	"initializer for aggregate with no elements requires explicit braces">;
	def err_bitfield_has_negative_width : Error<
	"bit-field %0 has negative width (%1)">;
	def err_anon_bitfield_has_negative_width : Error<
	"anonymous bit-field has negative width (%0)">;
	def err_bitfield_has_zero_width : Error<"named bit-field %0 has zero width">;
	def err_bitfield_width_exceeds_type_width : Error<
	"width of%select{ anonymous\|}0 bit-field%select{\| %1}0 (%2 bits) exceeds the "
	"%select{width\|size}3 of its type (%4 bit%s4)">;
	def err_incorrect_number_of_vector_initializers : Error<
	"number of elements must be either one or match the size of the vector">;

	// Used by C++ which allows bit-fields that are wider than the type.
	def warn_bitfield_width_exceeds_type_width: Warning<
	"width of bit-field %0 (%1 bits) exceeds the width of its type; value will "
	"be truncated to %2 bit%s2">, InGroup<BitFieldWidth>;
	def err_bitfield_too_wide : Error<
	"%select{bit-field %1\|anonymous bit-field}0 is too wide (%2 bits)">;
	def warn_bitfield_too_small_for_enum : Warning<
	"bit-field %0 is not wide enough to store all enumerators of %1">,
	InGroup<BitFieldEnumConversion>, DefaultIgnore;
	def note_widen_bitfield : Note<
	"widen this field to %0 bits to store all values of %1">;
	def warn_unsigned_bitfield_assigned_signed_enum : Warning<
	"assigning value of signed enum type %1 to unsigned bit-field %0; "
	"negative enumerators of enum %1 will be converted to positive values">,
	InGroup<BitFieldEnumConversion>, DefaultIgnore;
	def warn_signed_bitfield_enum_conversion : Warning<
	"signed bit-field %0 needs an extra bit to represent the largest positive "
	"enumerators of %1">,
	InGroup<BitFieldEnumConversion>, DefaultIgnore;
	def note_change_bitfield_sign : Note<
	"consider making the bitfield type %select{unsigned\|signed}0">;

	def warn_missing_braces : Warning<
	"suggest braces around initialization of subobject">,
	InGroup<MissingBraces>, DefaultIgnore;

	def err_redefinition_of_label : Error<"redefinition of label %0">;
	def err_undeclared_label_use : Error<"use of undeclared label %0">;
	def err_goto_ms_asm_label : Error<
	"cannot jump from this goto statement to label %0 inside an inline assembly block">;
	def note_goto_ms_asm_label : Note<
	"inline assembly label %0 declared here">;
	def warn_unused_label : Warning<"unused label %0">,
	InGroup<UnusedLabel>, DefaultIgnore;

	def err_continue_from_cond_var_init : Error<
	"cannot jump from this continue statement to the loop increment; "
	"jump bypasses initialization of loop condition variable">;
	def err_goto_into_protected_scope : Error<
	"cannot jump from this goto statement to its label">;
	def ext_goto_into_protected_scope : ExtWarn<
	"jump from this goto statement to its label is a Microsoft extension">,
	InGroup<MicrosoftGoto>;
	def warn_cxx98_compat_goto_into_protected_scope : Warning<
	"jump from this goto statement to its label is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_switch_into_protected_scope : Error<
	"cannot jump from switch statement to this case label">;
	def warn_cxx98_compat_switch_into_protected_scope : Warning<
	"jump from switch statement to this case label is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def err_indirect_goto_without_addrlabel : Error<
	"indirect goto in function with no address-of-label expressions">;
	def err_indirect_goto_in_protected_scope : Error<
	"cannot jump from this %select{indirect\|asm}0 goto statement to one of its possible targets">;
	def warn_cxx98_compat_indirect_goto_in_protected_scope : Warning<
	"jump from this %select{indirect\|asm}0 goto statement to one of its possible targets "
	"is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
	def note_indirect_goto_target : Note<
	"possible target of %select{indirect\|asm}0 goto statement">;
	def note_protected_by_variable_init : Note<
	"jump bypasses variable initialization">;
	def note_protected_by_variable_nontriv_destructor : Note<
	"jump bypasses variable with a non-trivial destructor">;
	def note_protected_by_variable_non_pod : Note<
	"jump bypasses initialization of non-POD variable">;
	def note_protected_by_cleanup : Note<
	"jump bypasses initialization of variable with __attribute__((cleanup))">;
	def note_protected_by_vla_typedef : Note<
	"jump bypasses initialization of VLA typedef">;
	def note_protected_by_vla_type_alias : Note<
	"jump bypasses initialization of VLA type alias">;
	def note_protected_by_constexpr_if : Note<
	"jump enters controlled statement of constexpr if">;
	def note_protected_by_consteval_if : Note<
	"jump enters controlled statement of consteval if">;
	def note_protected_by_if_available : Note<
	"jump enters controlled statement of if available">;
	def note_protected_by_vla : Note<
	"jump bypasses initialization of variable length array">;
	def note_protected_by_objc_fast_enumeration : Note<
	"jump enters Objective-C fast enumeration loop">;
	def note_protected_by_objc_try : Note<
	"jump bypasses initialization of @try block">;
	def note_protected_by_objc_catch : Note<
	"jump bypasses initialization of @catch block">;
	def note_protected_by_objc_finally : Note<
	"jump bypasses initialization of @finally block">;
	def note_protected_by_objc_synchronized : Note<
	"jump bypasses initialization of @synchronized block">;
	def note_protected_by_objc_autoreleasepool : Note<
	"jump bypasses auto release push of @autoreleasepool block">;
	def note_protected_by_cxx_try : Note<
	"jump bypasses initialization of try block">;
	def note_protected_by_cxx_catch : Note<
	"jump bypasses initialization of catch block">;
	def note_protected_by_seh_try : Note<
	"jump bypasses initialization of __try block">;
	def note_protected_by_seh_except : Note<
	"jump bypasses initialization of __except block">;
	def note_protected_by_seh_finally : Note<
	"jump bypasses initialization of __finally block">;
	def note_protected_by___block : Note<
	"jump bypasses setup of __block variable">;
	def note_protected_by_objc_strong_init : Note<
	"jump bypasses initialization of __strong variable">;
	def note_protected_by_objc_weak_init : Note<
	"jump bypasses initialization of __weak variable">;
	def note_protected_by_non_trivial_c_struct_init : Note<
	"jump bypasses initialization of variable of non-trivial C struct type">;
	def note_enters_block_captures_cxx_obj : Note<
	"jump enters lifetime of block which captures a destructible C++ object">;
	def note_enters_block_captures_strong : Note<
	"jump enters lifetime of block which strongly captures a variable">;
	def note_enters_block_captures_weak : Note<
	"jump enters lifetime of block which weakly captures a variable">;
	def note_enters_block_captures_non_trivial_c_struct : Note<
	"jump enters lifetime of block which captures a C struct that is non-trivial "
	"to destroy">;
	def note_enters_compound_literal_scope : Note<
	"jump enters lifetime of a compound literal that is non-trivial to destruct">;

	def note_exits_cleanup : Note<
	"jump exits scope of variable with __attribute__((cleanup))">;
	def note_exits_dtor : Note<
	"jump exits scope of variable with non-trivial destructor">;
	def note_exits_temporary_dtor : Note<
	"jump exits scope of lifetime-extended temporary with non-trivial "
	"destructor">;
	def note_exits___block : Note<
	"jump exits scope of __block variable">;
	def note_exits_objc_try : Note<
	"jump exits @try block">;
	def note_exits_objc_catch : Note<
	"jump exits @catch block">;
	def note_exits_objc_finally : Note<
	"jump exits @finally block">;
	def note_exits_objc_synchronized : Note<
	"jump exits @synchronized block">;
	def note_exits_cxx_try : Note<
	"jump exits try block">;
	def note_exits_cxx_catch : Note<
	"jump exits catch block">;
	def note_exits_seh_try : Note<
	"jump exits __try block">;
	def note_exits_seh_except : Note<
	"jump exits __except block">;
	def note_exits_seh_finally : Note<
	"jump exits __finally block">;
	def note_exits_objc_autoreleasepool : Note<
	"jump exits autoreleasepool block">;
	def note_exits_objc_strong : Note<
	"jump exits scope of __strong variable">;
	def note_exits_objc_weak : Note<
	"jump exits scope of __weak variable">;
	def note_exits_block_captures_cxx_obj : Note<
	"jump exits lifetime of block which captures a destructible C++ object">;
	def note_exits_block_captures_strong : Note<
	"jump exits lifetime of block which strongly captures a variable">;
	def note_exits_block_captures_weak : Note<
	"jump exits lifetime of block which weakly captures a variable">;
	def note_exits_block_captures_non_trivial_c_struct : Note<
	"jump exits lifetime of block which captures a C struct that is non-trivial "
	"to destroy">;
	def note_exits_compound_literal_scope : Note<
	"jump exits lifetime of a compound literal that is non-trivial to destruct">;

	def err_func_returning_qualified_void : ExtWarn<
	"function cannot return qualified void type %0">,
	InGroup<DiagGroup<"qualified-void-return-type">>;
	def err_func_returning_array_function : Error<
	"function cannot return %select{array\|function}0 type %1">;
	def err_field_declared_as_function : Error<"field %0 declared as a function">;
	def err_field_incomplete_or_sizeless : Error<
	"field has %select{incomplete\|sizeless}0 type %1">;
	def ext_variable_sized_type_in_struct : ExtWarn<
	"field %0 with variable sized type %1 not at the end of a struct or class is"
	" a GNU extension">, InGroup<GNUVariableSizedTypeNotAtEnd>;

	def ext_c99_flexible_array_member : Extension<
	"flexible array members are a C99 feature">, InGroup<C99>;
	def err_flexible_array_virtual_base : Error<
	"flexible array member %0 not allowed in "
	"%select{struct\|interface\|union\|class\|enum}1 which has a virtual base class">;
	def err_flexible_array_empty_aggregate : Error<
	"flexible array member %0 not allowed in otherwise empty "
	"%select{struct\|interface\|union\|class\|enum}1">;
	def err_flexible_array_has_nontrivial_dtor : Error<
	"flexible array member %0 of type %1 with non-trivial destruction">;
	def ext_flexible_array_in_struct : Extension<
	"%0 may not be nested in a struct due to flexible array member">,
	InGroup<FlexibleArrayExtensions>;
	def ext_flexible_array_in_array : Extension<
	"%0 may not be used as an array element due to flexible array member">,
	InGroup<FlexibleArrayExtensions>;
	def err_flexible_array_init : Error<
	"initialization of flexible array member is not allowed">;
	def ext_flexible_array_empty_aggregate_ms : Extension<
	"flexible array member %0 in otherwise empty "
	"%select{struct\|interface\|union\|class\|enum}1 is a Microsoft extension">,
	InGroup<MicrosoftFlexibleArray>;
	def err_flexible_array_union : Error<
	"flexible array member %0 in a union is not allowed">;
	def ext_flexible_array_union_ms : Extension<
	"flexible array member %0 in a union is a Microsoft extension">,
	InGroup<MicrosoftFlexibleArray>;
	def ext_flexible_array_empty_aggregate_gnu : Extension<
	"flexible array member %0 in otherwise empty "
	"%select{struct\|interface\|union\|class\|enum}1 is a GNU extension">,
	InGroup<GNUEmptyStruct>;
	def ext_flexible_array_union_gnu : Extension<
	"flexible array member %0 in a union is a GNU extension">, InGroup<GNUFlexibleArrayUnionMember>;

	def err_flexible_array_not_at_end : Error<
	"flexible array member %0 with type %1 is not at the end of"
	" %select{struct\|interface\|union\|class\|enum}2">;
	def err_objc_variable_sized_type_not_at_end : Error<
	"field %0 with variable sized type %1 is not at the end of class">;
	def note_next_field_declaration : Note<
	"next field declaration is here">;
	def note_next_ivar_declaration : Note<
	"next %select{instance variable declaration\|synthesized instance variable}0"
	" is here">;
	def err_synthesize_variable_sized_ivar : Error<
	"synthesized property with variable size type %0"
	" requires an existing instance variable">;
	def err_flexible_array_arc_retainable : Error<
	"ARC forbids flexible array members with retainable object type">;
	def warn_variable_sized_ivar_visibility : Warning<
	"field %0 with variable sized type %1 is not visible to subclasses and"
	" can conflict with their instance variables">, InGroup<ObjCFlexibleArray>;
	def warn_superclass_variable_sized_type_not_at_end : Warning<
	"field %0 can overwrite instance variable %1 with variable sized type %2"
	" in superclass %3">, InGroup<ObjCFlexibleArray>;

	let CategoryName = "ARC Semantic Issue" in {

	// ARC-mode diagnostics.

	let CategoryName = "ARC Weak References" in {

	def err_arc_weak_no_runtime : Error<
	"cannot create __weak reference because the current deployment target "
	"does not support weak references">;
	def err_arc_weak_disabled : Error<
	"cannot create __weak reference in file using manual reference counting">;
	def err_synthesizing_arc_weak_property_disabled : Error<
	"cannot synthesize weak property in file using manual reference counting">;
	def err_synthesizing_arc_weak_property_no_runtime : Error<
	"cannot synthesize weak property because the current deployment target "
	"does not support weak references">;
	def err_arc_unsupported_weak_class : Error<
	"class is incompatible with __weak references">;
	def err_arc_weak_unavailable_assign : Error<
	"assignment of a weak-unavailable object to a __weak object">;
	def err_arc_weak_unavailable_property : Error<
	"synthesizing __weak instance variable of type %0, which does not "
	"support weak references">;
	def note_implemented_by_class : Note<
	"when implemented by class %0">;
	def err_arc_convesion_of_weak_unavailable : Error<
	"%select{implicit conversion\|cast}0 of weak-unavailable object of type %1 to"
	" a __weak object of type %2">;

	} // end "ARC Weak References" category

	let CategoryName = "ARC Restrictions" in {

	def err_unavailable_in_arc : Error<
	"%0 is unavailable in ARC">;
	def note_arc_forbidden_type : Note<
	"declaration uses type that is ill-formed in ARC">;
	def note_performs_forbidden_arc_conversion : Note<
	"inline function performs a conversion which is forbidden in ARC">;
	def note_arc_init_returns_unrelated : Note<
	"init method must return a type related to its receiver type">;
	def note_arc_weak_disabled : Note<
	"declaration uses __weak, but ARC is disabled">;
	def note_arc_weak_no_runtime : Note<"declaration uses __weak, which "
	"the current deployment target does not support">;
	def note_arc_field_with_ownership : Note<
	"field has non-trivial ownership qualification">;

	def err_arc_illegal_explicit_message : Error<
	"ARC forbids explicit message send of %0">;
	def err_arc_unused_init_message : Error<
	"the result of a delegate init call must be immediately returned "
	"or assigned to 'self'">;
	def err_arc_mismatched_cast : Error<
	"%select{implicit conversion\|cast}0 of "
	"%select{%2\|a non-Objective-C pointer type %2\|a block pointer\|"
	"an Objective-C pointer\|an indirect pointer to an Objective-C pointer}1"
	" to %3 is disallowed with ARC">;
	def err_arc_nolifetime_behavior : Error<
	"explicit ownership qualifier on cast result has no effect">;
	def err_arc_objc_property_default_assign_on_object : Error<
	"ARC forbids synthesizing a property of an Objective-C object "
	"with unspecified ownership or storage attribute">;
	def err_arc_illegal_selector : Error<
	"ARC forbids use of %0 in a @selector">;
	def err_arc_illegal_method_def : Error<
	"ARC forbids %select{implementation\|synthesis}0 of %1">;
	def warn_arc_strong_pointer_objc_pointer : Warning<
	"method parameter of type %0 with no explicit ownership">,
	InGroup<DiagGroup<"explicit-ownership-type">>, DefaultIgnore;

	} // end "ARC Restrictions" category

	def err_arc_lost_method_convention : Error<
	"method was declared as %select{an 'alloc'\|a 'copy'\|an 'init'\|a 'new'}0 "
	"method, but its implementation doesn't match because %select{"
	"its result type is not an object pointer\|"
	"its result type is unrelated to its receiver type}1">;
	def note_arc_lost_method_convention : Note<"declaration in interface">;
	def err_arc_gained_method_convention : Error<
	"method implementation does not match its declaration">;
	def note_arc_gained_method_convention : Note<
	"declaration in interface is not in the '%select{alloc\|copy\|init\|new}0' "
	"family because %select{its result type is not an object pointer\|"
	"its result type is unrelated to its receiver type}1">;
	def err_typecheck_arc_assign_self : Error<
	"cannot assign to 'self' outside of a method in the init family">;
	def err_typecheck_arc_assign_self_class_method : Error<
	"cannot assign to 'self' in a class method">;
	def err_typecheck_arr_assign_enumeration : Error<
	"fast enumeration variables cannot be modified in ARC by default; "
	"declare the variable __strong to allow this">;
	def err_typecheck_arc_assign_externally_retained : Error<
	"variable declared with 'objc_externally_retained' "
	"cannot be modified in ARC">;
	def warn_arc_retained_assign : Warning<
	"assigning retained object to %select{weak\|unsafe_unretained}0 "
	"%select{property\|variable}1"
	"; object will be released after assignment">,
	InGroup<ARCUnsafeRetainedAssign>;
	def warn_arc_retained_property_assign : Warning<
	"assigning retained object to unsafe property"
	"; object will be released after assignment">,
	InGroup<ARCUnsafeRetainedAssign>;
	def warn_arc_literal_assign : Warning<
	"assigning %select{array literal\|dictionary literal\|numeric literal\|boxed expression\|<should not happen>\|block literal}0"
	" to a weak %select{property\|variable}1"
	"; object will be released after assignment">,
	InGroup<ARCUnsafeRetainedAssign>;
	def err_arc_new_array_without_ownership : Error<
	"'new' cannot allocate an array of %0 with no explicit ownership">;
	def err_arc_autoreleasing_var : Error<
	"%select{__block variables\|global variables\|fields\|instance variables}0 cannot have "
	"__autoreleasing ownership">;
	def err_arc_autoreleasing_capture : Error<
	"cannot capture __autoreleasing variable in a "
	"%select{block\|lambda by copy}0">;
	def err_arc_thread_ownership : Error<
	"thread-local variable has non-trivial ownership: type is %0">;
	def err_arc_indirect_no_ownership : Error<
	"%select{pointer\|reference}1 to non-const type %0 with no explicit ownership">;
	def err_arc_array_param_no_ownership : Error<
	"must explicitly describe intended ownership of an object array parameter">;
	def err_arc_pseudo_dtor_inconstant_quals : Error<
	"pseudo-destructor destroys object of type %0 with inconsistently-qualified "
	"type %1">;
	def err_arc_init_method_unrelated_result_type : Error<
	"init methods must return a type related to the receiver type">;
	def err_arc_nonlocal_writeback : Error<
	"passing address of %select{non-local\|non-scalar}0 object to "
	"__autoreleasing parameter for write-back">;
	def err_arc_method_not_found : Error<
	"no known %select{instance\|class}1 method for selector %0">;
	def err_arc_receiver_forward_class : Error<
	"receiver %0 for class message is a forward declaration">;
	def err_arc_may_not_respond : Error<
	"no visible @interface for %0 declares the selector %1">;
	def err_arc_receiver_forward_instance : Error<
	"receiver type %0 for instance message is a forward declaration">;
	def warn_receiver_forward_instance : Warning<
	"receiver type %0 for instance message is a forward declaration">,
	InGroup<ForwardClassReceiver>, DefaultIgnore;
	def err_arc_collection_forward : Error<
	"collection expression type %0 is a forward declaration">;
	def err_arc_multiple_method_decl : Error<
	"multiple methods named %0 found with mismatched result, "
	"parameter type or attributes">;
	def warn_arc_lifetime_result_type : Warning<
	"ARC %select{unused\|__unsafe_unretained\|__strong\|__weak\|__autoreleasing}0 "
	"lifetime qualifier on return type is ignored">,
	InGroup<IgnoredQualifiers>;

	let CategoryName = "ARC Retain Cycle" in {

	def warn_arc_retain_cycle : Warning<
	"capturing %0 strongly in this block is likely to lead to a retain cycle">,
	InGroup<ARCRetainCycles>;
	def note_arc_retain_cycle_owner : Note<
	"block will be retained by %select{the captured object\|an object strongly "
	"retained by the captured object}0">;

	} // end "ARC Retain Cycle" category

	def warn_arc_object_memaccess : Warning<
	"%select{destination for\|source of}0 this %1 call is a pointer to "
	"ownership-qualified type %2">, InGroup<ARCNonPodMemAccess>;

	let CategoryName = "ARC and @properties" in {

	def err_arc_strong_property_ownership : Error<
	"existing instance variable %1 for strong property %0 may not be "
	"%select{\|__unsafe_unretained\|\|__weak}2">;
	def err_arc_assign_property_ownership : Error<
	"existing instance variable %1 for property %0 with %select{unsafe_unretained\|assign}2 "
	"attribute must be __unsafe_unretained">;
	def err_arc_inconsistent_property_ownership : Error<
	"%select{\|unsafe_unretained\|strong\|weak}1 property %0 may not also be "
	"declared %select{\|__unsafe_unretained\|__strong\|__weak\|__autoreleasing}2">;

	} // end "ARC and @properties" category

	def warn_block_capture_autoreleasing : Warning<
	"block captures an autoreleasing out-parameter, which may result in "
	"use-after-free bugs">,
	InGroup<BlockCaptureAutoReleasing>;
	def note_declare_parameter_strong : Note<
	"declare the parameter __strong or capture a __block __strong variable to "
	"keep values alive across autorelease pools">;

	def err_arc_atomic_ownership : Error<
	"cannot perform atomic operation on a pointer to type %0: type has "
	"non-trivial ownership">;

	let CategoryName = "ARC Casting Rules" in {

	def err_arc_bridge_cast_incompatible : Error<
	"incompatible types casting %0 to %1 with a %select{__bridge\|"
	"__bridge_transfer\|__bridge_retained}2 cast">;
	def err_arc_bridge_cast_wrong_kind : Error<
	"cast of %select{Objective-C\|block\|C}0 pointer type %1 to "
	"%select{Objective-C\|block\|C}2 pointer type %3 cannot use %select{__bridge\|"
	"__bridge_transfer\|__bridge_retained}4">;
	def err_arc_cast_requires_bridge : Error<
	"%select{cast\|implicit conversion}0 of %select{Objective-C\|block\|C}1 "
	"pointer type %2 to %select{Objective-C\|block\|C}3 pointer type %4 "
	"requires a bridged cast">;
	def note_arc_bridge : Note<
	"use __bridge to convert directly (no change in ownership)">;
	def note_arc_cstyle_bridge : Note<
	"use __bridge with C-style cast to convert directly (no change in ownership)">;
	def note_arc_bridge_transfer : Note<
	"use %select{__bridge_transfer\|CFBridgingRelease call}1 to transfer "
	"ownership of a +1 %0 into ARC">;
	def note_arc_cstyle_bridge_transfer : Note<
	"use __bridge_transfer with C-style cast to transfer "
	"ownership of a +1 %0 into ARC">;
	def note_arc_bridge_retained : Note<
	"use %select{__bridge_retained\|CFBridgingRetain call}1 to make an "
	"ARC object available as a +1 %0">;
	def note_arc_cstyle_bridge_retained : Note<
	"use __bridge_retained with C-style cast to make an "
	"ARC object available as a +1 %0">;

	} // ARC Casting category

	} // ARC category name

	def err_flexible_array_init_needs_braces : Error<
	"flexible array requires brace-enclosed initializer">;
	def err_illegal_decl_array_of_functions : Error<
	"'%0' declared as array of functions of type %1">;
	def err_array_incomplete_or_sizeless_type : Error<
	"array has %select{incomplete\|sizeless}0 element type %1">;
	def err_illegal_message_expr_incomplete_type : Error<
	"Objective-C message has incomplete result type %0">;
	def err_illegal_decl_array_of_references : Error<
	"'%0' declared as array of references of type %1">;
	def err_decl_negative_array_size : Error<
	"'%0' declared as an array with a negative size">;
	def err_array_static_outside_prototype : Error<
	"%0 used in array declarator outside of function prototype">;
	def err_array_static_not_outermost : Error<
	"%0 used in non-outermost array type derivation">;
	def err_array_star_outside_prototype : Error<
	"star modifier used outside of function prototype">;
	def err_illegal_decl_pointer_to_reference : Error<
	"'%0' declared as a pointer to a reference of type %1">;
	def err_illegal_decl_mempointer_to_reference : Error<
	"'%0' declared as a member pointer to a reference of type %1">;
	def err_illegal_decl_mempointer_to_void : Error<
	"'%0' declared as a member pointer to void">;
	def err_illegal_decl_mempointer_in_nonclass : Error<
	"'%0' does not point into a class">;
	def err_mempointer_in_nonclass_type : Error<
	"member pointer refers into non-class type %0">;
	def err_reference_to_void : Error<"cannot form a reference to 'void'">;
	def err_nonfunction_block_type : Error<
	"block pointer to non-function type is invalid">;
	def err_return_block_has_expr : Error<"void block should not return a value">;
	def err_block_return_missing_expr : Error<
	"non-void block should return a value">;
	def err_func_def_incomplete_result : Error<
	"incomplete result type %0 in function definition">;
	def err_atomic_specifier_bad_type
	: Error<"_Atomic cannot be applied to "
	"%select{incomplete \|array \|function \|reference \|atomic \|qualified "
	"\|sizeless \|\|integer }0type "
	"%1 %select{\|\|\|\|\|\|\|which is not trivially copyable\|}0">;
	def warn_atomic_member_access : Warning<
	"accessing a member of an atomic structure or union is undefined behavior">,
	InGroup<DiagGroup<"atomic-access">>, DefaultError;

	// Expressions.
	def ext_sizeof_alignof_function_type : Extension<
	"invalid application of '%0' to a function type">, InGroup<PointerArith>;
	def ext_sizeof_alignof_void_type : Extension<
	"invalid application of '%0' to a void type">, InGroup<PointerArith>;
	def err_opencl_sizeof_alignof_type : Error<
	"invalid application of '%0' to a void type">;
	def err_sizeof_alignof_incomplete_or_sizeless_type : Error<
	"invalid application of '%0' to %select{an incomplete\|sizeless}1 type %2">;
	def err_sizeof_alignof_function_type : Error<
	"invalid application of '%0' to a function type">;
	def err_openmp_default_simd_align_expr : Error<
	"invalid application of '__builtin_omp_required_simd_align' to an expression, only type is allowed">;
	def err_sizeof_alignof_typeof_bitfield : Error<
	"invalid application of '%select{sizeof\|alignof\|typeof\|typeof_unqual}0' to "
	"bit-field">;
	def err_alignof_member_of_incomplete_type : Error<
	"invalid application of 'alignof' to a field of a class still being defined">;
	def err_vecstep_non_scalar_vector_type : Error<
	"'vec_step' requires built-in scalar or vector type, %0 invalid">;
	def err_offsetof_incomplete_type : Error<
	"offsetof of incomplete type %0">;
	def err_offsetof_record_type : Error<
	"offsetof requires struct, union, or class type, %0 invalid">;
	def err_offsetof_array_type : Error<"offsetof requires array type, %0 invalid">;
	def ext_offsetof_non_pod_type : ExtWarn<"offset of on non-POD type %0">,
	InGroup<InvalidOffsetof>;
	def ext_offsetof_non_standardlayout_type : ExtWarn<
	"offset of on non-standard-layout type %0">, InGroup<InvalidOffsetof>;
	def err_offsetof_bitfield : Error<"cannot compute offset of bit-field %0">;
	def err_offsetof_field_of_virtual_base : Error<
	"invalid application of 'offsetof' to a field of a virtual base">;
	def warn_sub_ptr_zero_size_types : Warning<
	"subtraction of pointers to type %0 of zero size has undefined behavior">,
	InGroup<PointerArith>;
	def warn_pointer_arith_null_ptr : Warning<
	"performing pointer arithmetic on a null pointer has undefined behavior%select{\| if the offset is nonzero}0">,
	InGroup<NullPointerArithmetic>, DefaultIgnore;
	def warn_gnu_null_ptr_arith : Extension<
	"arithmetic on a null pointer treated as a cast from integer to pointer is a GNU extension">,
	InGroup<GNUNullPointerArithmetic>;
	def warn_pointer_sub_null_ptr : Warning<
	"performing pointer subtraction with a null pointer %select{has\|may have}0 undefined behavior">,
	InGroup<NullPointerSubtraction>, DefaultIgnore;

	def warn_floatingpoint_eq : Warning<
	"comparing floating point with == or != is unsafe">,
	InGroup<DiagGroup<"float-equal">>, DefaultIgnore;

	def err_setting_eval_method_used_in_unsafe_context : Error <
	"%select{'#pragma clang fp eval_method'\|option 'ffp-eval-method'}0 cannot be used with "
	"%select{option 'fapprox-func'\|option 'mreassociate'\|option 'freciprocal'\|option 'ffp-eval-method'\|'#pragma clang fp reassociate'}1">;

	def warn_remainder_division_by_zero : Warning<
	"%select{remainder\|division}0 by zero is undefined">,
	InGroup<DivZero>;
	def warn_shift_lhs_negative : Warning<"shifting a negative signed value is undefined">,
	InGroup<DiagGroup<"shift-negative-value">>;
	def warn_shift_negative : Warning<"shift count is negative">,
	InGroup<DiagGroup<"shift-count-negative">>;
	def warn_shift_gt_typewidth : Warning<"shift count >= width of type">,
	InGroup<DiagGroup<"shift-count-overflow">>;
	def warn_shift_result_gt_typewidth : Warning<
	"signed shift result (%0) requires %1 bits to represent, but %2 only has "
	"%3 bits">, InGroup<DiagGroup<"shift-overflow">>;
	def warn_shift_result_sets_sign_bit : Warning<
	"signed shift result (%0) sets the sign bit of the shift expression's "
	"type (%1) and becomes negative">,
	InGroup<DiagGroup<"shift-sign-overflow">>, DefaultIgnore;

	def warn_precedence_bitwise_rel : Warning<
	"%0 has lower precedence than %1; %1 will be evaluated first">,
	InGroup<Parentheses>;
	def note_precedence_bitwise_first : Note<
	"place parentheses around the %0 expression to evaluate it first">;
	def note_precedence_silence : Note<
	"place parentheses around the '%0' expression to silence this warning">;

	def warn_precedence_conditional : Warning<
	"operator '?:' has lower precedence than '%0'; '%0' will be evaluated first">,
	InGroup<Parentheses>;
	def warn_precedence_bitwise_conditional : Warning<
	"operator '?:' has lower precedence than '%0'; '%0' will be evaluated first">,
	InGroup<BitwiseConditionalParentheses>;
	def note_precedence_conditional_first : Note<
	"place parentheses around the '?:' expression to evaluate it first">;

	def warn_enum_constant_in_bool_context : Warning<
	"converting the enum constant to a boolean">,
	InGroup<IntInBoolContext>, DefaultIgnore;
	def warn_left_shift_in_bool_context : Warning<
	"converting the result of '<<' to a boolean; did you mean '(%0) != 0'?">,
	InGroup<IntInBoolContext>, DefaultIgnore;
	def warn_logical_instead_of_bitwise : Warning<
	"use of logical '%0' with constant operand">,
	InGroup<DiagGroup<"constant-logical-operand">>;
	def note_logical_instead_of_bitwise_change_operator : Note<
	"use '%0' for a bitwise operation">;
	def note_logical_instead_of_bitwise_remove_constant : Note<
	"remove constant to silence this warning">;

	def warn_bitwise_op_in_bitwise_op : Warning<
	"'%0' within '%1'">, InGroup<BitwiseOpParentheses>, DefaultIgnore;

	def warn_logical_and_in_logical_or : Warning<
	"'&&' within '\|\|'">, InGroup<LogicalOpParentheses>, DefaultIgnore;

	def warn_overloaded_shift_in_comparison :Warning<
	"overloaded operator %select{>>\|<<}0 has higher precedence than "
	"comparison operator">,
	InGroup<OverloadedShiftOpParentheses>;
	def note_evaluate_comparison_first :Note<
	"place parentheses around comparison expression to evaluate it first">;

	def note_concatenated_string_literal_silence :Note<
	"place parentheses around the string literal to silence warning">;

	def warn_addition_in_bitshift : Warning<
	"operator '%0' has lower precedence than '%1'; "
	"'%1' will be evaluated first">, InGroup<ShiftOpParentheses>;

	def warn_self_assignment_builtin : Warning<
	"explicitly assigning value of variable of type %0 to itself%select{\|; did "
	"you mean to assign to member %2?}1">,
	InGroup<SelfAssignment>, DefaultIgnore;
	def warn_self_assignment_overloaded : Warning<
	"explicitly assigning value of variable of type %0 to itself%select{\|; did "
	"you mean to assign to member %2?}1">,
	InGroup<SelfAssignmentOverloaded>, DefaultIgnore;
	def warn_self_move : Warning<
	"explicitly moving variable of type %0 to itself%select{\|; did you mean to "
	"move to member %2?}1">,
	InGroup<SelfMove>, DefaultIgnore;

	def err_builtin_move_forward_unsupported : Error<
	"unsupported signature for %q0">;
	def err_use_of_unaddressable_function : Error<
	"taking address of non-addressable standard library function">;
	// FIXME: This should also be in -Wc++23-compat once we have it.
	def warn_cxx20_compat_use_of_unaddressable_function : Warning<
	"taking address of non-addressable standard library function "
	"is incompatible with C++20">, InGroup<CXX20Compat>;

	def warn_redundant_move_on_return : Warning<
	"redundant move in return statement">,
	InGroup<RedundantMove>, DefaultIgnore;
	def warn_pessimizing_move_on_return : Warning<
	"moving a local object in a return statement prevents copy elision">,
	InGroup<PessimizingMove>, DefaultIgnore;
	def warn_pessimizing_move_on_initialization : Warning<
	"moving a temporary object prevents copy elision">,
	InGroup<PessimizingMove>, DefaultIgnore;
	def note_remove_move : Note<"remove std::move call here">;

	def warn_string_plus_int : Warning<
	"adding %0 to a string does not append to the string">,
	InGroup<StringPlusInt>;
	def warn_string_plus_char : Warning<
	"adding %0 to a string pointer does not append to the string">,
	InGroup<StringPlusChar>;
	def note_string_plus_scalar_silence : Note<
	"use array indexing to silence this warning">;

	def warn_sizeof_array_param : Warning<
	"sizeof on array function parameter will return size of %0 instead of %1">,
	InGroup<SizeofArrayArgument>;

	def warn_sizeof_array_decay : Warning<
	"sizeof on pointer operation will return size of %0 instead of %1">,
	InGroup<SizeofArrayDecay>;

	def err_sizeof_nonfragile_interface : Error<
	"application of '%select{alignof\|sizeof}1' to interface %0 is "
	"not supported on this architecture and platform">;
	def err_atdef_nonfragile_interface : Error<
	"use of @defs is not supported on this architecture and platform">;
	def err_subscript_nonfragile_interface : Error<
	"subscript requires size of interface %0, which is not constant for "
	"this architecture and platform">;

	def err_arithmetic_nonfragile_interface : Error<
	"arithmetic on pointer to interface %0, which is not a constant size for "
	"this architecture and platform">;

	def warn_deprecated_comma_subscript : Warning<
	"top-level comma expression in array subscript is deprecated "
	"in C++20 and unsupported in C++2b">,
	InGroup<DeprecatedCommaSubscript>;

	def ext_subscript_non_lvalue : Extension<
	"ISO C90 does not allow subscripting non-lvalue array">;
	def err_typecheck_subscript_value : Error<
	"subscripted value is not an array, pointer, or vector">;
	def err_typecheck_subscript_not_integer : Error<
	"array subscript is not an integer">;
	def err_subscript_function_type : Error<
	"subscript of pointer to function type %0">;
	def err_subscript_incomplete_or_sizeless_type : Error<
	"subscript of pointer to %select{incomplete\|sizeless}0 type %1">;
	def err_subscript_svbool_t : Error<
	"subscript of svbool_t is not allowed">;
	def err_dereference_incomplete_type : Error<
	"dereference of pointer to incomplete type %0">;
	def ext_gnu_subscript_void_type : Extension<
	"subscript of a pointer to void is a GNU extension">,
	InGroup<GNUPointerArith>;
	def err_typecheck_member_reference_struct_union : Error<
	"member reference base type %0 is not a structure or union">;
	def err_typecheck_member_reference_ivar : Error<
	"%0 does not have a member named %1">;
	def err_arc_weak_ivar_access : Error<
	"dereferencing a __weak pointer is not allowed due to possible "
	"null value caused by race condition, assign it to strong variable first">;
	def err_typecheck_member_reference_arrow : Error<
	"member reference type %0 is not a pointer">;
	def err_typecheck_member_reference_suggestion : Error<
	"member reference type %0 is %select{a\|not a}1 pointer; did you mean to use '%select{->\|.}1'?">;
	def note_typecheck_member_reference_suggestion : Note<
	"did you mean to use '.' instead?">;
	def note_member_reference_arrow_from_operator_arrow : Note<
	"'->' applied to return value of the operator->() declared here">;
	def err_typecheck_member_reference_type : Error<
	"cannot refer to type member %0 in %1 with '%select{.\|->}2'">;
	def err_typecheck_member_reference_unknown : Error<
	"cannot refer to member %0 in %1 with '%select{.\|->}2'">;
	def err_member_reference_needs_call : Error<
	"base of member reference is a function; perhaps you meant to call "
	"it%select{\| with no arguments}0?">;
	def warn_subscript_is_char : Warning<"array subscript is of type 'char'">,
	InGroup<CharSubscript>, DefaultIgnore;

	def err_typecheck_incomplete_tag : Error<"incomplete definition of type %0">;
	def err_no_member : Error<"no member named %0 in %1">;
	def err_no_member_overloaded_arrow : Error<
	"no member named %0 in %1; did you mean to use '->' instead of '.'?">;

	def err_member_not_yet_instantiated : Error<
	"no member %0 in %1; it has not yet been instantiated">;
	def note_non_instantiated_member_here : Note<
	"not-yet-instantiated member is declared here">;

	def err_enumerator_does_not_exist : Error<
	"enumerator %0 does not exist in instantiation of %1">;
	def note_enum_specialized_here : Note<
	"enum %0 was explicitly specialized here">;

	def err_specialization_not_primary_template : Error<
	"cannot reference member of primary template because deduced class "
	"template specialization %0 is %select{instantiated from a partial\|"
	"an explicit}1 specialization">;

	def err_member_redeclared : Error<"class member cannot be redeclared">;
	def ext_member_redeclared : ExtWarn<"class member cannot be redeclared">,
	InGroup<RedeclaredClassMember>;
	def err_member_redeclared_in_instantiation : Error<
	"multiple overloads of %0 instantiate to the same signature %1">;
	def err_member_name_of_class : Error<"member %0 has the same name as its class">;
	def err_member_def_undefined_record : Error<
	"out-of-line definition of %0 from class %1 without definition">;
	def err_member_decl_does_not_match : Error<
	"out-of-line %select{declaration\|definition}2 of %0 "
	"does not match any declaration in %1">;
	def err_friend_decl_with_def_arg_must_be_def : Error<
	"friend declaration specifying a default argument must be a definition">;
	def err_friend_decl_with_def_arg_redeclared : Error<
	"friend declaration specifying a default argument must be the only declaration">;
	def err_friend_decl_does_not_match : Error<
	"friend declaration of %0 does not match any declaration in %1">;
	def err_member_decl_does_not_match_suggest : Error<
	"out-of-line %select{declaration\|definition}2 of %0 "
	"does not match any declaration in %1; did you mean %3?">;
	def err_member_def_does_not_match_ret_type : Error<
	"return type of out-of-line definition of %q0 differs from "
	"that in the declaration">;
	def err_nonstatic_member_out_of_line : Error<
	"non-static data member defined out-of-line">;
	def err_qualified_typedef_declarator : Error<
	"typedef declarator cannot be qualified">;
	def err_qualified_param_declarator : Error<
	"parameter declarator cannot be qualified">;
	def ext_out_of_line_declaration : ExtWarn<
	"out-of-line declaration of a member must be a definition">,
	InGroup<OutOfLineDeclaration>, DefaultError;
	def err_member_extra_qualification : Error<
	"extra qualification on member %0">;
	def warn_member_extra_qualification : Warning<
	err_member_extra_qualification.Summary>, InGroup<MicrosoftExtraQualification>;
	def warn_namespace_member_extra_qualification : Warning<
	"extra qualification on member %0">,
	InGroup<DiagGroup<"extra-qualification">>;
	def err_member_qualification : Error<
	"non-friend class member %0 cannot have a qualified name">;
	def note_member_def_close_match : Note<"member declaration nearly matches">;
	def note_member_def_close_const_match : Note<
	"member declaration does not match because "
	"it %select{is\|is not}0 const qualified">;
	def note_member_def_close_param_match : Note<
	"type of %ordinal0 parameter of member declaration does not match definition"
	"%diff{ ($ vs $)\|}1,2">;
	def note_local_decl_close_match : Note<"local declaration nearly matches">;
	def note_local_decl_close_param_match : Note<
	"type of %ordinal0 parameter of local declaration does not match definition"
	"%diff{ ($ vs $)\|}1,2">;
	def err_typecheck_ivar_variable_size : Error<
	"instance variables must have a constant size">;
	def err_ivar_reference_type : Error<
	"instance variables cannot be of reference type">;
	def err_typecheck_illegal_increment_decrement : Error<
	"cannot %select{decrement\|increment}1 value of type %0">;
	def err_typecheck_expect_int : Error<
	"used type %0 where integer is required">;
	def err_typecheck_arithmetic_incomplete_or_sizeless_type : Error<
	"arithmetic on a pointer to %select{an incomplete\|sizeless}0 type %1">;
	def err_typecheck_pointer_arith_function_type : Error<
	"arithmetic on%select{ a\|}0 pointer%select{\|s}0 to%select{ the\|}2 "
	"function type%select{\|s}2 %1%select{\| and %3}2">;
	def err_typecheck_pointer_arith_void_type : Error<
	"arithmetic on%select{ a\|}0 pointer%select{\|s}0 to void">;
	def err_typecheck_decl_incomplete_type : Error<
	"variable has incomplete type %0">;
	def ext_typecheck_decl_incomplete_type : ExtWarn<
	"tentative definition of variable with internal linkage has incomplete non-array type %0">,
	InGroup<DiagGroup<"tentative-definition-incomplete-type">>;
	def err_tentative_def_incomplete_type : Error<
	"tentative definition has type %0 that is never completed">;
	def warn_tentative_incomplete_array : Warning<
	"tentative array definition assumed to have one element">;
	def err_typecheck_incomplete_array_needs_initializer : Error<
	"definition of variable with array type needs an explicit size "
	"or an initializer">;
	def err_array_init_not_init_list : Error<
	"array initializer must be an initializer "
	"list%select{\| or string literal\| or wide string literal}0">;
	def err_array_init_narrow_string_into_wchar : Error<
	"initializing wide char array with non-wide string literal">;
	def err_array_init_wide_string_into_char : Error<
	"initializing char array with wide string literal">;
	def err_array_init_incompat_wide_string_into_wchar : Error<
	"initializing wide char array with incompatible wide string literal">;
	def err_array_init_plain_string_into_char8_t : Error<
	"initializing 'char8_t' array with plain string literal">;
	def note_array_init_plain_string_into_char8_t : Note<
	"add 'u8' prefix to form a 'char8_t' string literal">;
	def err_array_init_utf8_string_into_char : Error<
	"initialization of %select{\|signed }0char array with "
	"UTF-8 string literal is not permitted by %select{'-fchar8_t'\|C++20}1">;
	def warn_cxx20_compat_utf8_string : Warning<
	"type of UTF-8 string literal will change from array of const char to "
	"array of const char8_t in C++20">, InGroup<CXX20Compat>, DefaultIgnore;
	def note_cxx20_compat_utf8_string_remove_u8 : Note<
	"remove 'u8' prefix to avoid a change of behavior; "
	"Clang encodes unprefixed narrow string literals as UTF-8">;
	def err_array_init_different_type : Error<
	"cannot initialize array %diff{of type $ with array of type $\|"
	"with different type of array}0,1">;
	def err_array_init_non_constant_array : Error<
	"cannot initialize array %diff{of type $ with non-constant array of type $\|"
	"with different type of array}0,1">;
	def ext_array_init_copy : Extension<
	"initialization of an array "
	"%diff{of type $ from a compound literal of type $\|"
	"from a compound literal}0,1 is a GNU extension">, InGroup<GNUCompoundLiteralInitializer>;
	// This is intentionally not disabled by -Wno-gnu.
	def ext_array_init_parens : ExtWarn<
	"parenthesized initialization of a member array is a GNU extension">,
	InGroup<DiagGroup<"gnu-array-member-paren-init">>, DefaultError;
	def warn_deprecated_string_literal_conversion : Warning<
	"conversion from string literal to %0 is deprecated">,
	InGroup<CXX11CompatDeprecatedWritableStr>;
	def ext_deprecated_string_literal_conversion : ExtWarn<
	"ISO C++11 does not allow conversion from string literal to %0">,
	InGroup<WritableStrings>, SFINAEFailure;
	def err_realimag_invalid_type : Error<"invalid type %0 to %1 operator">;
	def err_typecheck_sclass_fscope : Error<
	"illegal storage class on file-scoped variable">;
	def warn_standalone_specifier : Warning<"'%0' ignored on this declaration">,
	InGroup<MissingDeclarations>;
	def ext_standalone_specifier : ExtWarn<"'%0' is not permitted on a declaration "
	"of a type">, InGroup<MissingDeclarations>;
	def err_standalone_class_nested_name_specifier : Error<
	"forward declaration of %select{class\|struct\|interface\|union\|enum}0 cannot "
	"have a nested name specifier">;
	def err_typecheck_sclass_func : Error<"illegal storage class on function">;
	def err_static_block_func : Error<
	"function declared in block scope cannot have 'static' storage class">;
	def err_typecheck_address_of : Error<"address of %select{bit-field"
	"\|vector element\|property expression\|register variable\|matrix element}0 requested">;
	def ext_typecheck_addrof_void : Extension<
	"ISO C forbids taking the address of an expression of type 'void'">;
	def err_unqualified_pointer_member_function : Error<
	"must explicitly qualify name of member function when taking its address">;
	def err_invalid_form_pointer_member_function : Error<
	"cannot create a non-constant pointer to member function">;
	def err_address_of_function_with_pass_object_size_params: Error<
	"cannot take address of function %0 because parameter %1 has "
	"pass_object_size attribute">;
	def err_parens_pointer_member_function : Error<
	"cannot parenthesize the name of a method when forming a member pointer">;
	def err_typecheck_invalid_lvalue_addrof_addrof_function : Error<
	"extra '&' taking address of overloaded function">;
	def err_typecheck_invalid_lvalue_addrof : Error<
	"cannot take the address of an rvalue of type %0">;
	def ext_typecheck_addrof_temporary : ExtWarn<
	"taking the address of a temporary object of type %0">,
	InGroup<AddressOfTemporary>, DefaultError;
	def err_typecheck_addrof_temporary : Error<
	"taking the address of a temporary object of type %0">;
	def err_typecheck_addrof_dtor : Error<
	"taking the address of a destructor">;
	def err_typecheck_unary_expr : Error<
	"invalid argument type %0 to unary expression">;
	def err_typecheck_indirection_requires_pointer : Error<
	"indirection requires pointer operand (%0 invalid)">;
	def ext_typecheck_indirection_through_void_pointer : ExtWarn<
	"ISO C does not allow indirection on operand of type %0">,
	InGroup<VoidPointerDeref>;
	def ext_typecheck_indirection_through_void_pointer_cpp
	: ExtWarn<"ISO C++ does not allow indirection on operand of type %0">,
	InGroup<VoidPointerDeref>, DefaultError, SFINAEFailure;
	def warn_indirection_through_null : Warning<
	"indirection of non-volatile null pointer will be deleted, not trap">,
	InGroup<NullDereference>;
	def warn_binding_null_to_reference : Warning<
	"binding dereferenced null pointer to reference has undefined behavior">,
	InGroup<NullDereference>;
	def note_indirection_through_null : Note<
	"consider using __builtin_trap() or qualifying pointer with 'volatile'">;
	def warn_pointer_indirection_from_incompatible_type : Warning<
	"dereference of type %1 that was reinterpret_cast from type %0 has undefined "
	"behavior">,
	InGroup<UndefinedReinterpretCast>, DefaultIgnore;
	def warn_taking_address_of_packed_member : Warning<
	"taking address of packed member %0 of class or structure %q1 may result in an unaligned pointer value">,
	InGroup<DiagGroup<"address-of-packed-member">>;
	def warn_param_mismatched_alignment : Warning<
	"passing %0-byte aligned argument to %1-byte aligned parameter %2%select{\| of %4}3 may result in an unaligned pointer access">,
	InGroup<DiagGroup<"align-mismatch">>;

	def err_objc_object_assignment : Error<
	"cannot assign to class object (%0 invalid)">;
	def err_typecheck_invalid_operands : Error<
	"invalid operands to binary expression (%0 and %1)">, Deferrable;
	def note_typecheck_invalid_operands_converted : Note<
	"%select{first\|second}0 operand was implicitly converted to type %1">;
	def err_typecheck_logical_vector_expr_gnu_cpp_restrict : Error<
	"logical expression with vector %select{type %1 and non-vector type %2\|types"
	" %1 and %2}0 is only supported in C++">;
	def err_typecheck_sub_ptr_compatible : Error<
	"%diff{$ and $ are not pointers to compatible types\|"
	"pointers to incompatible types}0,1">;
	def ext_typecheck_ordered_comparison_of_pointer_integer : ExtWarn<
	"ordered comparison between pointer and integer (%0 and %1)">;
	def ext_typecheck_ordered_comparison_of_pointer_and_zero : Extension<
	"ordered comparison between pointer and zero (%0 and %1) is an extension">;
	def err_typecheck_ordered_comparison_of_pointer_and_zero : Error<
	"ordered comparison between pointer and zero (%0 and %1)">;
	def err_typecheck_three_way_comparison_of_pointer_and_zero : Error<
	"three-way comparison between pointer and zero">;
	def ext_typecheck_compare_complete_incomplete_pointers : Extension<
	"pointer comparisons before C11 "
	"need to be between two complete or two incomplete types; "
	"%0 is %select{\|in}2complete and "
	"%1 is %select{\|in}3complete">,
	InGroup<C11>;
	def warn_typecheck_ordered_comparison_of_function_pointers : Warning<
	"ordered comparison of function pointers (%0 and %1)">,
	InGroup<OrderedCompareFunctionPointers>;
	def ext_typecheck_ordered_comparison_of_function_pointers : ExtWarn<
	"ordered comparison of function pointers (%0 and %1)">,
	InGroup<OrderedCompareFunctionPointers>;
	def err_typecheck_ordered_comparison_of_function_pointers : Error<
	"ordered comparison of function pointers (%0 and %1)">;
	def ext_typecheck_comparison_of_fptr_to_void : Extension<
	"equality comparison between function pointer and void pointer (%0 and %1)">;
	def err_typecheck_comparison_of_fptr_to_void : Error<
	"equality comparison between function pointer and void pointer (%0 and %1)">;
	def ext_typecheck_comparison_of_pointer_integer : ExtWarn<
	"comparison between pointer and integer (%0 and %1)">,
	InGroup<DiagGroup<"pointer-integer-compare">>;
	def err_typecheck_comparison_of_pointer_integer : Error<
	"comparison between pointer and integer (%0 and %1)">;
	def ext_typecheck_comparison_of_distinct_pointers : ExtWarn<
	"comparison of distinct pointer types%diff{ ($ and $)\|}0,1">,
	InGroup<CompareDistinctPointerType>;
	def ext_typecheck_cond_incompatible_operands : ExtWarn<
	"incompatible operand types (%0 and %1)">;
	def err_cond_voidptr_arc : Error <
	"operands to conditional of types%diff{ $ and $\|}0,1 are incompatible "
	"in ARC mode">;
	def err_typecheck_comparison_of_distinct_pointers : Error<
	"comparison of distinct pointer types%diff{ ($ and $)\|}0,1">;
	def err_typecheck_op_on_nonoverlapping_address_space_pointers : Error<
	"%select{comparison between %diff{ ($ and $)\|}0,1"
	"\|arithmetic operation with operands of type %diff{ ($ and $)\|}0,1"
	"\|conditional operator with the second and third operands of type "
	"%diff{ ($ and $)\|}0,1}2"
	" which are pointers to non-overlapping address spaces">;

	def select_arith_conv_kind : TextSubstitution<
	"%select{arithmetic between\|bitwise operation between\|comparison of\|"
	"conditional expression between\|compound assignment of}0">;
	def warn_arith_conv_enum_float : Warning<
	"%sub{select_arith_conv_kind}0 "
	"%select{floating-point\|enumeration}1 type %2 "
	"%plural{2:with\|4:from\|:and}0 "
	"%select{enumeration\|floating-point}1 type %3">,
	InGroup<EnumFloatConversion>, DefaultIgnore;
	def warn_arith_conv_enum_float_cxx20 : Warning<
	"%sub{select_arith_conv_kind}0 "
	"%select{floating-point\|enumeration}1 type %2 "
	"%plural{2:with\|4:from\|:and}0 "
	"%select{enumeration\|floating-point}1 type %3 is deprecated">,
	InGroup<DeprecatedEnumFloatConversion>;
	def warn_arith_conv_mixed_enum_types : Warning<
	"%sub{select_arith_conv_kind}0 "
	"different enumeration types%diff{ ($ and $)\|}1,2">,
	InGroup<EnumEnumConversion>, DefaultIgnore;
	def warn_arith_conv_mixed_enum_types_cxx20 : Warning<
	"%sub{select_arith_conv_kind}0 "
	"different enumeration types%diff{ ($ and $)\|}1,2 is deprecated">,
	InGroup<DeprecatedEnumEnumConversion>;
	def warn_arith_conv_mixed_anon_enum_types : Warning<
	warn_arith_conv_mixed_enum_types.Summary>,
	InGroup<AnonEnumEnumConversion>, DefaultIgnore;
	def warn_arith_conv_mixed_anon_enum_types_cxx20 : Warning<
	warn_arith_conv_mixed_enum_types_cxx20.Summary>,
	InGroup<DeprecatedAnonEnumEnumConversion>;
	def warn_conditional_mixed_enum_types : Warning<
	warn_arith_conv_mixed_enum_types.Summary>,
	InGroup<EnumCompareConditional>, DefaultIgnore;
	def warn_conditional_mixed_enum_types_cxx20 : Warning<
	warn_arith_conv_mixed_enum_types_cxx20.Summary>,
	InGroup<DeprecatedEnumCompareConditional>;
	def warn_comparison_mixed_enum_types : Warning<
	warn_arith_conv_mixed_enum_types.Summary>,
	InGroup<EnumCompare>;
	def warn_comparison_mixed_enum_types_cxx20 : Warning<
	warn_arith_conv_mixed_enum_types_cxx20.Summary>,
	InGroup<DeprecatedEnumCompare>;
	def warn_comparison_of_mixed_enum_types_switch : Warning<
	"comparison of different enumeration types in switch statement"
	"%diff{ ($ and $)\|}0,1">,
	InGroup<EnumCompareSwitch>;

	def err_typecheck_assign_const : Error<
	"%select{"
	"cannot assign to return value because function %1 returns a const value\|"
	"cannot assign to variable %1 with const-qualified type %2\|"
	"cannot assign to %select{non-\|}1static data member %2 "
	"with const-qualified type %3\|"
	"cannot assign to non-static data member within const member function %1\|"
	"cannot assign to %select{variable %2\|non-static data member %2\|lvalue}1 "
	"with %select{\|nested }3const-qualified data member %4\|"
	"read-only variable is not assignable}0">;

	def note_typecheck_assign_const : Note<
	"%select{"
	"function %1 which returns const-qualified type %2 declared here\|"
	"variable %1 declared const here\|"
	"%select{non-\|}1static data member %2 declared const here\|"
	"member function %q1 is declared const here\|"
	"%select{\|nested }1data member %2 declared const here}0">;

	def warn_unsigned_always_true_comparison : Warning<
	"result of comparison of %select{%3\|unsigned expression}0 %2 "
	"%select{unsigned expression\|%3}0 is always %4">,
	InGroup<TautologicalUnsignedZeroCompare>, DefaultIgnore;
	def warn_unsigned_char_always_true_comparison : Warning<
	"result of comparison of %select{%3\|char expression}0 %2 "
	"%select{char expression\|%3}0 is always %4, since char is interpreted as "
	"unsigned">, InGroup<TautologicalUnsignedCharZeroCompare>, DefaultIgnore;
	def warn_unsigned_enum_always_true_comparison : Warning<
	"result of comparison of %select{%3\|unsigned enum expression}0 %2 "
	"%select{unsigned enum expression\|%3}0 is always %4">,
	InGroup<TautologicalUnsignedEnumZeroCompare>, DefaultIgnore;
	def warn_tautological_constant_compare : Warning<
	"result of comparison %select{%3\|%1}0 %2 "
	"%select{%1\|%3}0 is always %4">,
	InGroup<TautologicalTypeLimitCompare>, DefaultIgnore;
	def warn_tautological_compare_objc_bool : Warning<
	"result of comparison of constant %0 with expression of type 'BOOL'"
	" is always %1, as the only well defined values for 'BOOL' are YES and NO">,
	InGroup<TautologicalObjCBoolCompare>;
	def subst_int_range : TextSubstitution<"%0-bit %select{signed\|unsigned}1 value">;
	def warn_tautological_compare_value_range : Warning<
	"result of comparison of "
	"%select{%4\|%sub{subst_int_range}1,2}0 %3 "
	"%select{%sub{subst_int_range}1,2\|%4}0 is always %5">,
	InGroup<TautologicalValueRangeCompare>, DefaultIgnore;

	def warn_mixed_sign_comparison : Warning<
	"comparison of integers of different signs: %0 and %1">,
	InGroup<SignCompare>, DefaultIgnore;
	def warn_out_of_range_compare : Warning<
	"result of comparison of %select{constant %0\|true\|false}1 with "
	"%select{expression of type %2\|boolean expression}3 is always %4">,
	InGroup<TautologicalOutOfRangeCompare>;
	def warn_tautological_bool_compare : Warning<warn_out_of_range_compare.Summary>,
	InGroup<TautologicalConstantCompare>;
	def warn_integer_constants_in_conditional_always_true : Warning<
	"converting the result of '?:' with integer constants to a boolean always "
	"evaluates to 'true'">,
	InGroup<TautologicalConstantCompare>;
	def warn_left_shift_always : Warning<
	"converting the result of '<<' to a boolean always evaluates "
	"to %select{false\|true}0">,
	InGroup<TautologicalConstantCompare>;
	def warn_null_in_arithmetic_operation : Warning<
	"use of NULL in arithmetic operation">,
	InGroup<NullArithmetic>;
	def warn_null_in_comparison_operation : Warning<
	"comparison between NULL and non-pointer "
	"%select{(%1 and NULL)\|(NULL and %1)}0">,
	InGroup<NullArithmetic>;
	def err_shift_rhs_only_vector : Error<
	"requested shift is a vector of type %0 but the first operand is not a "
	"vector (%1)">;

	def warn_logical_not_on_lhs_of_check : Warning<
	"logical not is only applied to the left hand side of this "
	"%select{comparison\|bitwise operator}0">,
	InGroup<LogicalNotParentheses>;
	def note_logical_not_fix : Note<
	"add parentheses after the '!' to evaluate the "
	"%select{comparison\|bitwise operator}0 first">;
	def note_logical_not_silence_with_parens : Note<
	"add parentheses around left hand side expression to silence this warning">;

	def err_invalid_this_use : Error<
	"invalid use of 'this' outside of a non-static member function">;
	def err_this_static_member_func : Error<
	"'this' cannot be%select{\| implicitly}0 used in a static member function "
	"declaration">;
	def err_invalid_member_use_in_static_method : Error<
	"invalid use of member %0 in static member function">;
	def err_invalid_qualified_function_type : Error<
	"%select{non-member function\|static member function\|deduction guide}0 "
	"%select{of type %2 \|}1cannot have '%3' qualifier">;
	def err_compound_qualified_function_type : Error<
	"%select{block pointer\|pointer\|reference}0 to function type %select{%2 \|}1"
	"cannot have '%3' qualifier">;
	def err_qualified_function_typeid : Error<
	"type operand %0 of 'typeid' cannot have '%1' qualifier">;

	def err_ref_qualifier_overload : Error<
	"cannot overload a member function %select{without a ref-qualifier\|with "
	"ref-qualifier '&'\|with ref-qualifier '&&'}0 with a member function %select{"
	"without a ref-qualifier\|with ref-qualifier '&'\|with ref-qualifier '&&'}1">;

	def err_invalid_non_static_member_use : Error<
	"invalid use of non-static data member %0">;
	def err_nested_non_static_member_use : Error<
	"%select{call to non-static member function\|use of non-static data member}0 "
	"%2 of %1 from nested type %3">;
	def warn_cxx98_compat_non_static_member_use : Warning<
	"use of non-static data member %0 in an unevaluated context is "
	"incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
	def err_invalid_incomplete_type_use : Error<
	"invalid use of incomplete type %0">;
	def err_builtin_func_cast_more_than_one_arg : Error<
	"function-style cast to a builtin type can only take one argument">;
	def err_value_init_for_array_type : Error<
	"array types cannot be value-initialized">;
	def err_init_for_function_type : Error<
	"cannot create object of function type %0">;
	def warn_format_nonliteral_noargs : Warning<
	"format string is not a string literal (potentially insecure)">,
	InGroup<FormatSecurity>;
	def warn_format_nonliteral : Warning<
	"format string is not a string literal">,
	InGroup<FormatNonLiteral>, DefaultIgnore;

	def err_unexpected_interface : Error<
	"unexpected interface name %0: expected expression">;
	def err_ref_non_value : Error<"%0 does not refer to a value">;
	def err_ref_vm_type : Error<
	"cannot refer to declaration with a variably modified type inside block">;
	def err_ref_flexarray_type : Error<
	"cannot refer to declaration of structure variable with flexible array member "
	"inside block">;
	def err_ref_array_type : Error<
	"cannot refer to declaration with an array type inside block">;
	def err_property_not_found : Error<
	"property %0 not found on object of type %1">;
	def err_invalid_property_name : Error<
	"%0 is not a valid property name (accessing an object of type %1)">;
	def err_getter_not_found : Error<
	"no getter method for read from property">;
	def err_objc_subscript_method_not_found : Error<
	"expected method to %select{read\|write}1 %select{dictionary\|array}2 element not "
	"found on object of type %0">;
	def err_objc_subscript_index_type : Error<
	"method index parameter type %0 is not integral type">;
	def err_objc_subscript_key_type : Error<
	"method key parameter type %0 is not object type">;
	def err_objc_subscript_dic_object_type : Error<
	"method object parameter type %0 is not object type">;
	def err_objc_subscript_object_type : Error<
	"cannot assign to this %select{dictionary\|array}1 because assigning method's "
	"2nd parameter of type %0 is not an Objective-C pointer type">;
	def err_objc_subscript_base_type : Error<
	"%select{dictionary\|array}1 subscript base type %0 is not an Objective-C object">;
	def err_objc_multiple_subscript_type_conversion : Error<
	"indexing expression is invalid because subscript type %0 has "
	"multiple type conversion functions">;
	def err_objc_subscript_type_conversion : Error<
	"indexing expression is invalid because subscript type %0 is not an integral"
	" or Objective-C pointer type">;
	def err_objc_subscript_pointer : Error<
	"indexing expression is invalid because subscript type %0 is not an"
	" Objective-C pointer">;
	def err_objc_indexing_method_result_type : Error<
	"method for accessing %select{dictionary\|array}1 element must have Objective-C"
	" object return type instead of %0">;
	def err_objc_index_incomplete_class_type : Error<
	"Objective-C index expression has incomplete class type %0">;
	def err_illegal_container_subscripting_op : Error<
	"illegal operation on Objective-C container subscripting">;
	def err_property_not_found_forward_class : Error<
	"property %0 cannot be found in forward class object %1">;
	def err_property_not_as_forward_class : Error<
	"property %0 refers to an incomplete Objective-C class %1 "
	"(with no @interface available)">;
	def note_forward_class : Note<
	"forward declaration of class here">;
	def err_duplicate_property : Error<
	"property has a previous declaration">;
	def ext_gnu_void_ptr : Extension<
	"arithmetic on%select{ a\|}0 pointer%select{\|s}0 to void is a GNU extension">,
	InGroup<GNUPointerArith>;
	def ext_gnu_ptr_func_arith : Extension<
	"arithmetic on%select{ a\|}0 pointer%select{\|s}0 to%select{ the\|}2 function "
	"type%select{\|s}2 %1%select{\| and %3}2 is a GNU extension">,
	InGroup<GNUPointerArith>;
	def err_readonly_message_assignment : Error<
	"assigning to 'readonly' return result of an Objective-C message not allowed">;
	def ext_integer_increment_complex : Extension<
	"ISO C does not support '++'/'--' on complex integer type %0">;
	def ext_integer_complement_complex : Extension<
	"ISO C does not support '~' for complex conjugation of %0">;
	def err_nosetter_property_assignment : Error<
	"%select{assignment to readonly property\|"
	"no setter method %1 for assignment to property}0">;
	def err_nosetter_property_incdec : Error<
	"%select{%select{increment\|decrement}1 of readonly property\|"
	"no setter method %2 for %select{increment\|decrement}1 of property}0">;
	def err_nogetter_property_compound_assignment : Error<
	"a getter method is needed to perform a compound assignment on a property">;
	def err_nogetter_property_incdec : Error<
	"no getter method %1 for %select{increment\|decrement}0 of property">;
	def err_no_subobject_property_setting : Error<
	"expression is not assignable">;
	def err_qualified_objc_access : Error<
	"%select{property\|instance variable}0 access cannot be qualified with '%1'">;

	def ext_freestanding_complex : Extension<
	"complex numbers are an extension in a freestanding C99 implementation">;

	// FIXME: Remove when we support imaginary.
	def err_imaginary_not_supported : Error<"imaginary types are not supported">;

	// Obj-c expressions
	def warn_root_inst_method_not_found : Warning<
	"instance method %0 is being used on 'Class' which is not in the root class">,
	InGroup<MethodAccess>;
	def warn_class_method_not_found : Warning<
	"class method %objcclass0 not found (return type defaults to 'id')">,
	InGroup<MethodAccess>;
	def warn_instance_method_on_class_found : Warning<
	"instance method %0 found instead of class method %1">,
	InGroup<MethodAccess>;
	def warn_inst_method_not_found : Warning<
	"instance method %objcinstance0 not found (return type defaults to 'id')">,
	InGroup<MethodAccess>;
	def warn_instance_method_not_found_with_typo : Warning<
	"instance method %objcinstance0 not found (return type defaults to 'id')"
	"; did you mean %objcinstance2?">, InGroup<MethodAccess>;
	def warn_class_method_not_found_with_typo : Warning<
	"class method %objcclass0 not found (return type defaults to 'id')"
	"; did you mean %objcclass2?">, InGroup<MethodAccess>;
	def err_method_not_found_with_typo : Error<
	"%select{instance\|class}1 method %0 not found "
	"; did you mean %2?">;
	def err_no_super_class_message : Error<
	"no @interface declaration found in class messaging of %0">;
	def err_root_class_cannot_use_super : Error<
	"%0 cannot use 'super' because it is a root class">;
	def err_invalid_receiver_to_message_super : Error<
	"'super' is only valid in a method body">;
	def err_invalid_receiver_class_message : Error<
	"receiver type %0 is not an Objective-C class">;
	def err_missing_open_square_message_send : Error<
	"missing '[' at start of message send expression">;
	def warn_bad_receiver_type : Warning<
	"receiver type %0 is not 'id' or interface pointer, consider "
	"casting it to 'id'">,InGroup<ObjCReceiver>;
	def err_bad_receiver_type : Error<"bad receiver type %0">;
	def err_incomplete_receiver_type : Error<"incomplete receiver type %0">;
	def err_unknown_receiver_suggest : Error<
	"unknown receiver %0; did you mean %1?">;
	def err_objc_throw_expects_object : Error<
	"@throw requires an Objective-C object type (%0 invalid)">;
	def err_objc_synchronized_expects_object : Error<
	"@synchronized requires an Objective-C object type (%0 invalid)">;
	def err_rethrow_used_outside_catch : Error<
	"@throw (rethrow) used outside of a @catch block">;
	def err_attribute_multiple_objc_gc : Error<
	"multiple garbage collection attributes specified for type">;
	def err_catch_param_not_objc_type : Error<
	"@catch parameter is not a pointer to an interface type">;
	def err_illegal_qualifiers_on_catch_parm : Error<
	"illegal qualifiers on @catch parameter">;
	def err_storage_spec_on_catch_parm : Error<
	"@catch parameter cannot have storage specifier '%0'">;
	def warn_register_objc_catch_parm : Warning<
	"'register' storage specifier on @catch parameter will be ignored">;
	def err_qualified_objc_catch_parm : Error<
	"@catch parameter declarator cannot be qualified">;
	def warn_objc_pointer_cxx_catch_fragile : Warning<
	"cannot catch an exception thrown with @throw in C++ in the non-unified "
	"exception model">, InGroup<ObjCNonUnifiedException>;
	def err_objc_object_catch : Error<
	"cannot catch an Objective-C object by value">;
	def err_incomplete_type_objc_at_encode : Error<
	"'@encode' of incomplete type %0">;
	def warn_objc_circular_container : Warning<
	"adding %0 to %1 might cause circular dependency in container">,
	InGroup<DiagGroup<"objc-circular-container">>;
	def note_objc_circular_container_declared_here : Note<"%0 declared here">;
	def warn_objc_unsafe_perform_selector : Warning<
	"%0 is incompatible with selectors that return a "
	"%select{struct\|union\|vector}1 type">,
	InGroup<DiagGroup<"objc-unsafe-perform-selector">>;
	def note_objc_unsafe_perform_selector_method_declared_here : Note<
	"method %0 that returns %1 declared here">;
	def err_attribute_arm_builtin_alias : Error<
	"'__clang_arm_builtin_alias' attribute can only be applied to an ARM builtin">;
	def err_attribute_arm_mve_polymorphism : Error<
	"'__clang_arm_mve_strict_polymorphism' attribute can only be applied to an MVE/NEON vector type">;

	def warn_setter_getter_impl_required : Warning<
	"property %0 requires method %1 to be defined - "
	"use @synthesize, @dynamic or provide a method implementation "
	"in this class implementation">,
	InGroup<ObjCPropertyImpl>;
	def warn_setter_getter_impl_required_in_category : Warning<
	"property %0 requires method %1 to be defined - "
	"use @dynamic or provide a method implementation in this category">,
	InGroup<ObjCPropertyImpl>;
	def note_parameter_named_here : Note<
	"passing argument to parameter %0 here">;
	def note_parameter_here : Note<
	"passing argument to parameter here">;
	def note_method_return_type_change : Note<
	"compiler has implicitly changed method %0 return type">;

	def warn_impl_required_for_class_property : Warning<
	"class property %0 requires method %1 to be defined - "
	"use @dynamic or provide a method implementation "
	"in this class implementation">,
	InGroup<ObjCPropertyImpl>;
	def warn_impl_required_in_category_for_class_property : Warning<
	"class property %0 requires method %1 to be defined - "
	"use @dynamic or provide a method implementation in this category">,
	InGroup<ObjCPropertyImpl>;

	// C++ casts
	// These messages adhere to the TryCast pattern: %0 is an int specifying the
	// cast type, %1 is the source type, %2 is the destination type.
	def err_bad_reinterpret_cast_overload : Error<
	"reinterpret_cast cannot resolve overloaded function %0 to type %1">;

	def warn_reinterpret_different_from_static : Warning<
	"'reinterpret_cast' %select{from\|to}3 class %0 %select{to\|from}3 its "
	"%select{virtual base\|base at non-zero offset}2 %1 behaves differently from "
	"'static_cast'">, InGroup<ReinterpretBaseClass>;
	def note_reinterpret_updowncast_use_static: Note<
	"use 'static_cast' to adjust the pointer correctly while "
	"%select{upcasting\|downcasting}0">;

	def err_bad_static_cast_overload : Error<
	"address of overloaded function %0 cannot be static_cast to type %1">;

	def err_bad_cstyle_cast_overload : Error<
	"address of overloaded function %0 cannot be cast to type %1">;


	def err_bad_cxx_cast_generic : Error<
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|"
	"C-style cast\|functional-style cast\|addrspace_cast}0 from %1 to %2 is not allowed">;
	def err_bad_cxx_cast_unrelated_class : Error<
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|C-style cast\|"
	"functional-style cast\|}0 from %1 to %2, which are not related by "
	"inheritance, is not allowed">;
	def note_type_incomplete : Note<"%0 is incomplete">;
	def err_bad_cxx_cast_rvalue : Error<
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|C-style cast\|"
	"functional-style cast\|addrspace_cast}0 from rvalue to reference type %2">;
	def err_bad_cxx_cast_bitfield : Error<
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|C-style cast\|"
	"functional-style cast\|}0 from bit-field lvalue to reference type %2">;
	def err_bad_cxx_cast_qualifiers_away : Error<
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|C-style cast\|"
	"functional-style cast\|}0 from %1 to %2 casts away qualifiers">;
	def err_bad_cxx_cast_addr_space_mismatch : Error<
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|"
	"C-style cast\|functional-style cast\|addrspace_cast}0 from %1 to %2 converts between mismatching address"
	" spaces">;
	def ext_bad_cxx_cast_qualifiers_away_incoherent : ExtWarn<
	"ISO C++ does not allow "
	"%select{const_cast\|static_cast\|reinterpret_cast\|dynamic_cast\|C-style cast\|"
	"functional-style cast\|}0 from %1 to %2 because it casts away qualifiers, "
	"even though the source and destination types are unrelated">,
	SFINAEFailure, InGroup<DiagGroup<"cast-qual-unrelated">>;
	def err_bad_const_cast_dest : Error<
	"%select{const_cast\|\|\|\|C-style cast\|functional-style cast\|}0 to %2, "
	"which is not a reference, pointer-to-object, or pointer-to-data-member">;
	def ext_cast_fn_obj : Extension<
	"cast between pointer-to-function and pointer-to-object is an extension">;
	def ext_ms_cast_fn_obj : ExtWarn<
	"static_cast between pointer-to-function and pointer-to-object is a "
	"Microsoft extension">, InGroup<MicrosoftCast>;
	def warn_cxx98_compat_cast_fn_obj : Warning<
	"cast between pointer-to-function and pointer-to-object is incompatible with C++98">,
	InGroup<CXX98CompatPedantic>, DefaultIgnore;
	def err_bad_reinterpret_cast_small_int : Error<
	"cast from pointer to smaller type %2 loses information">;
	def err_bad_cxx_cast_vector_to_scalar_different_size : Error<
	"%select{\|\|reinterpret_cast\|\|C-style cast\|\|}0 from vector %1 "
	"to scalar %2 of different size">;
	def err_bad_cxx_cast_scalar_to_vector_different_size : Error<
	"%select{\|\|reinterpret_cast\|\|C-style cast\|\|}0 from scalar %1 "
	"to vector %2 of different size">;
	def err_bad_cxx_cast_vector_to_vector_different_size : Error<
	"%select{\|\|reinterpret_cast\|\|C-style cast\|\|}0 from vector %1 "
	"to vector %2 of different size">;
	def warn_bad_cxx_cast_nested_pointer_addr_space : Warning<
	"%select{reinterpret_cast\|C-style cast}0 from %1 to %2 "
	"changes address space of nested pointers">,
	InGroup<IncompatiblePointerTypesDiscardsQualifiers>;
	def err_bad_lvalue_to_rvalue_cast : Error<
	"cannot cast from lvalue of type %1 to rvalue reference type %2; types are "
	"not compatible">;
	def err_bad_rvalue_to_rvalue_cast : Error<
	"cannot cast from rvalue of type %1 to rvalue reference type %2; types are "
	"not compatible">;
	def err_bad_static_cast_pointer_nonpointer : Error<
	"cannot cast from type %1 to pointer type %2">;
	def err_bad_static_cast_member_pointer_nonmp : Error<
	"cannot cast from type %1 to member pointer type %2">;
	def err_bad_cxx_cast_member_pointer_size : Error<
	"cannot %select{\|\|reinterpret_cast\|\|C-style cast\|\|}0 from member pointer "
	"type %1 to member pointer type %2 of different size">;
	def err_bad_reinterpret_cast_reference : Error<
	"reinterpret_cast of a %0 to %1 needs its address, which is not allowed">;
	def warn_undefined_reinterpret_cast : Warning<
	"reinterpret_cast from %0 to %1 has undefined behavior">,
	InGroup<UndefinedReinterpretCast>, DefaultIgnore;

	// These messages don't adhere to the pattern.
	// FIXME: Display the path somehow better.
	def err_ambiguous_base_to_derived_cast : Error<
	"ambiguous cast from base %0 to derived %1:%2">;
	def err_static_downcast_via_virtual : Error<
	"cannot cast %0 to %1 via virtual base %2">;
	def err_downcast_from_inaccessible_base : Error<
	"cannot cast %select{private\|protected}2 base class %1 to %0">;
	def err_upcast_to_inaccessible_base : Error<
	"cannot cast %0 to its %select{private\|protected}2 base class %1">;
	def err_bad_dynamic_cast_not_ref_or_ptr : Error<
	"invalid target type %0 for dynamic_cast; target type must be a reference or pointer type to a defined class">;
	def err_bad_dynamic_cast_not_class : Error<"%0 is not a class type">;
	def err_bad_cast_incomplete : Error<"%0 is an incomplete type">;
	def err_bad_dynamic_cast_not_ptr : Error<"cannot use dynamic_cast to convert from %0 to %1">;
	def err_bad_dynamic_cast_not_polymorphic : Error<"%0 is not polymorphic">;

	// Other C++ expressions
	def err_need_header_before_typeid : Error<
	"you need to include <typeinfo> before using the 'typeid' operator">;
	def err_need_header_before_placement_new : Error<
	"no matching %0 function for non-allocating placement new expression; "
	"include <new>">;
	def err_ms___leave_not_in___try : Error<
	"'__leave' statement not in __try block">;
	def err_uuidof_without_guid : Error<
	"cannot call operator __uuidof on a type with no GUID">;
	def err_uuidof_with_multiple_guids : Error<
	"cannot call operator __uuidof on a type with multiple GUIDs">;
	def err_incomplete_typeid : Error<"'typeid' of incomplete type %0">;
	def err_variably_modified_typeid : Error<"'typeid' of variably modified type %0">;
	def err_static_illegal_in_new : Error<
	"the 'static' modifier for the array size is not legal in new expressions">;
	def err_array_new_needs_size : Error<
	"array size must be specified in new expression with no initializer">;
	def err_bad_new_type : Error<
	"cannot allocate %select{function\|reference}1 type %0 with new">;
	def err_new_incomplete_or_sizeless_type : Error<
	"allocation of %select{incomplete\|sizeless}0 type %1">;
	def err_new_array_nonconst : Error<
	"only the first dimension of an allocated array may have dynamic size">;
	def err_new_array_size_unknown_from_init : Error<
	"cannot determine allocated array size from initializer">;
	def err_new_array_init_args : Error<
	"array 'new' cannot have initialization arguments">;
	def ext_new_paren_array_nonconst : ExtWarn<
	"when type is in parentheses, array cannot have dynamic size">;
	def err_placement_new_non_placement_delete : Error<
	"'new' expression with placement arguments refers to non-placement "
	"'operator delete'">;
	def err_array_size_not_integral : Error<
	"array size expression must have integral or %select{\|unscoped }0"
	"enumeration type, not %1">;
	def err_array_size_incomplete_type : Error<
	"array size expression has incomplete class type %0">;
	def err_array_size_explicit_conversion : Error<
	"array size expression of type %0 requires explicit conversion to type %1">;
	def note_array_size_conversion : Note<
	"conversion to %select{integral\|enumeration}0 type %1 declared here">;
	def err_array_size_ambiguous_conversion : Error<
	"ambiguous conversion of array size expression of type %0 to an integral or "
	"enumeration type">;
	def ext_array_size_conversion : Extension<
	"implicit conversion from array size expression of type %0 to "
	"%select{integral\|enumeration}1 type %2 is a C++11 extension">,
	InGroup<CXX11>;
	def warn_cxx98_compat_array_size_conversion : Warning<
	"implicit conversion from array size expression of type %0 to "
	"%select{integral\|enumeration}1 type %2 is incompatible with C++98">,
	InGroup<CXX98CompatPedantic>, DefaultIgnore;
	def err_address_space_qualified_new : Error<
	"'new' cannot allocate objects of type %0 in address space '%1'">;
	def err_address_space_qualified_delete : Error<
	"'delete' cannot delete objects of type %0 in address space '%1'">;

	def err_default_init_const : Error<
	"default initialization of an object of const type %0"
	"%select{\| without a user-provided default constructor}1">;
	def ext_default_init_const : ExtWarn<
	"default initialization of an object of const type %0"
	"%select{\| without a user-provided default constructor}1 "
	"is a Microsoft extension">,
	InGroup<MicrosoftConstInit>;
	def err_delete_operand : Error<"cannot delete expression of type %0">;
	def ext_delete_void_ptr_operand : ExtWarn<
	"cannot delete expression with pointer-to-'void' type %0">,
	InGroup<DeleteIncomplete>;
	def err_ambiguous_delete_operand : Error<
	"ambiguous conversion of delete expression of type %0 to a pointer">;
	def warn_delete_incomplete : Warning<
	"deleting pointer to incomplete type %0 may cause undefined behavior">,
	InGroup<DeleteIncomplete>;
	def err_delete_incomplete_class_type : Error<
	"deleting incomplete class type %0; no conversions to pointer type">;
	def err_delete_explicit_conversion : Error<
	"converting delete expression from type %0 to type %1 invokes an explicit "
	"conversion function">;
	def note_delete_conversion : Note<"conversion to pointer type %0">;
	def warn_delete_array_type : Warning<
	"'delete' applied to a pointer-to-array type %0 treated as 'delete[]'">;
	def warn_mismatched_delete_new : Warning<
	"'delete%select{\|[]}0' applied to a pointer that was allocated with "
	"'new%select{[]\|}0'; did you mean 'delete%select{[]\|}0'?">,
	InGroup<DiagGroup<"mismatched-new-delete">>;
	def note_allocated_here : Note<"allocated with 'new%select{[]\|}0' here">;
	def err_no_suitable_delete_member_function_found : Error<
	"no suitable member %0 in %1">;
	def err_ambiguous_suitable_delete_member_function_found : Error<
	"multiple suitable %0 functions in %1">;
	def warn_ambiguous_suitable_delete_function_found : Warning<
	"multiple suitable %0 functions for %1; no 'operator delete' function "
	"will be invoked if initialization throws an exception">,
	InGroup<DiagGroup<"ambiguous-delete">>;
	def note_member_declared_here : Note<
	"member %0 declared here">;
	def note_member_first_declared_here : Note<
	"member %0 first declared here">;
	def warn_bitwise_instead_of_logical : Warning<
	"use of bitwise '%0' with boolean operands">,
	InGroup<BitwiseInsteadOfLogical>, DefaultIgnore;
	def warn_bitwise_negation_bool : Warning<
	"bitwise negation of a boolean expression%select{;\| always evaluates to 'true';}0 "
	"did you mean logical negation?">,
	InGroup<BoolOperation>, DefaultIgnore;
	def err_decrement_bool : Error<"cannot decrement expression of type bool">;
	def warn_increment_bool : Warning<
	"incrementing expression of type bool is deprecated and "
	"incompatible with C++17">, InGroup<DeprecatedIncrementBool>;
	def ext_increment_bool : ExtWarn<
	"ISO C++17 does not allow incrementing expression of type bool">,
	DefaultError, InGroup<IncrementBool>;
	def err_increment_decrement_enum : Error<
	"cannot %select{decrement\|increment}0 expression of enum type %1">;

	def warn_deprecated_increment_decrement_volatile : Warning<
	"%select{decrement\|increment}0 of object of volatile-qualified type %1 "
	"is deprecated">, InGroup<DeprecatedVolatile>;
	def warn_deprecated_simple_assign_volatile : Warning<
	"use of result of assignment to object of volatile-qualified type %0 "
	"is deprecated">, InGroup<DeprecatedVolatile>;
	def warn_deprecated_volatile_return : Warning<
	"volatile-qualified return type %0 is deprecated">,
	InGroup<DeprecatedVolatile>;
	def warn_deprecated_volatile_param : Warning<
	"volatile-qualified parameter type %0 is deprecated">,
	InGroup<DeprecatedVolatile>;
	def warn_deprecated_volatile_structured_binding : Warning<
	"volatile qualifier in structured binding declaration is deprecated">,
	InGroup<DeprecatedVolatile>;

	def warn_deprecated_altivec_src_compat : Warning<
	"Current handling of vector bool and vector pixel types in this context are "
	"deprecated. The default behaviour will soon change to that implied by the "
	"'-altivec-compat=xl' option">,
	InGroup<DiagGroup<"deprecated-altivec-src-compat">>;

	def warn_deprecated_lax_vec_conv_all : Warning<
	"Implicit conversion between vector types ('%0' and '%1') is deprecated. "
	"In the future, the behavior implied by '-fno-lax-vector-conversions' "
	"will be the default.">,
	InGroup<DiagGroup<"deprecate-lax-vec-conv-all">>;

	def err_catch_incomplete_ptr : Error<
	"cannot catch pointer to incomplete type %0">;
	def err_catch_incomplete_ref : Error<
	"cannot catch reference to incomplete type %0">;
	def err_catch_incomplete : Error<"cannot catch incomplete type %0">;
	def err_catch_sizeless : Error<
	"cannot catch %select{\|reference to }0sizeless type %1">;
	def err_catch_rvalue_ref : Error<"cannot catch exceptions by rvalue reference">;
	def err_catch_variably_modified : Error<
	"cannot catch variably modified type %0">;
	def err_qualified_catch_declarator : Error<
	"exception declarator cannot be qualified">;
	def err_early_catch_all : Error<"catch-all handler must come last">;
	def err_bad_memptr_rhs : Error<
	"right hand operand to %0 has non-pointer-to-member type %1">;
	def err_bad_memptr_lhs : Error<
	"left hand operand to %0 must be a %select{\|pointer to }1class "
	"compatible with the right hand operand, but is %2">;
	def err_memptr_incomplete : Error<
	"member pointer has incomplete base type %0">;
	def warn_exception_caught_by_earlier_handler : Warning<
	"exception of type %0 will be caught by earlier handler">,
	InGroup<Exceptions>;
	def note_previous_exception_handler : Note<"for type %0">;
	def err_exceptions_disabled : Error<
	"cannot use '%0' with exceptions disabled">;
	def err_objc_exceptions_disabled : Error<
	"cannot use '%0' with Objective-C exceptions disabled">;
	def warn_throw_in_noexcept_func : Warning<
	"%0 has a non-throwing exception specification but can still throw">,
	InGroup<Exceptions>;
	def note_throw_in_dtor : Note<
	"%select{destructor\|deallocator}0 has a %select{non-throwing\|implicit "
	"non-throwing}1 exception specification">;
	def note_throw_in_function : Note<"function declared non-throwing here">;
	def err_seh_try_outside_functions : Error<
	"cannot use SEH '__try' in blocks, captured regions, or Obj-C method decls">;
	def err_mixing_cxx_try_seh_try : Error<
	"cannot use %select{C++ 'try'\|Objective-C '@try'}0 "
	"in the same function as SEH '__try'">;
	def err_seh_try_unsupported : Error<
	"SEH '__try' is not supported on this target">;
	def note_conflicting_try_here : Note<
	"conflicting %0 here">;
	def warn_jump_out_of_seh_finally : Warning<
	"jump out of __finally block has undefined behavior">,
	InGroup<DiagGroup<"jump-seh-finally">>;
	def warn_non_virtual_dtor : Warning<
	"%0 has virtual functions but non-virtual destructor">,
	InGroup<NonVirtualDtor>, DefaultIgnore;
	def warn_delete_non_virtual_dtor : Warning<
	"%select{delete\|destructor}0 called on non-final %1 that has "
	"virtual functions but non-virtual destructor">,
	InGroup<DeleteNonAbstractNonVirtualDtor>, DefaultIgnore, ShowInSystemHeader;
	def note_delete_non_virtual : Note<
	"qualify call to silence this warning">;
	def warn_delete_abstract_non_virtual_dtor : Warning<
	"%select{delete\|destructor}0 called on %1 that is abstract but has "
	"non-virtual destructor">, InGroup<DeleteAbstractNonVirtualDtor>, ShowInSystemHeader;
	def warn_overloaded_virtual : Warning<
	"%q0 hides overloaded virtual %select{function\|functions}1">,
	InGroup<OverloadedVirtual>, DefaultIgnore;
	def note_hidden_overloaded_virtual_declared_here : Note<
	"hidden overloaded virtual function %q0 declared here"
	"%select{\|: different classes%diff{ ($ vs $)\|}2,3"
	"\|: different number of parameters (%2 vs %3)"
	"\|: type mismatch at %ordinal2 parameter%diff{ ($ vs $)\|}3,4"
	"\|: different return type%diff{ ($ vs $)\|}2,3"
	"\|: different qualifiers (%2 vs %3)"
	"\|: different exception specifications}1">;
	def warn_using_directive_in_header : Warning<
	"using namespace directive in global context in header">,
	InGroup<HeaderHygiene>, DefaultIgnore;
	def warn_overaligned_type : Warning<
	"type %0 requires %1 bytes of alignment and the default allocator only "
	"guarantees %2 bytes">,
	InGroup<OveralignedType>, DefaultIgnore;
	def err_array_element_alignment : Error<
	"size of array element of type %0 (%1 bytes) isn't a multiple of its alignment (%2 bytes)">;
	def err_aligned_allocation_unavailable : Error<
	"aligned %select{allocation\|deallocation}0 function of type '%1' is "
	"%select{only\|not}4 available on %2%select{ %3 or newer\|}4">;
	def note_silence_aligned_allocation_unavailable : Note<
	"if you supply your own aligned allocation functions, use "
	"-faligned-allocation to silence this diagnostic">;

	def err_conditional_void_nonvoid : Error<
	"%select{left\|right}1 operand to ? is void, but %select{right\|left}1 operand "
	"is of type %0">;
	def err_conditional_ambiguous : Error<
	"conditional expression is ambiguous; "
	"%diff{$ can be converted to $ and vice versa\|"
	"types can be convert to each other}0,1">;
	def err_conditional_ambiguous_ovl : Error<
	"conditional expression is ambiguous; %diff{$ and $\|types}0,1 "
	"can be converted to several common types">;
	def err_conditional_vector_size : Error<
	"vector condition type %0 and result type %1 do not have the same number "
	"of elements">;
	def err_conditional_vector_element_size : Error<
	"vector condition type %0 and result type %1 do not have elements of the "
	"same size">;
	def err_conditional_vector_has_void : Error<
	"GNU vector conditional operand cannot be %select{void\|a throw expression}0">;
	def err_conditional_vector_operand_type
	: Error<"enumeration type %0 is not allowed in a vector conditional">;
	def err_conditional_vector_cond_result_mismatch
	: Error<"cannot mix vectors and extended vectors in a vector conditional">;
	def err_conditional_vector_mismatched
	: Error<"vector operands to the vector conditional must be the same type "
	"%diff{($ and $)\|}0,1}">;

	def err_throw_incomplete : Error<
	"cannot throw object of incomplete type %0">;
	def err_throw_incomplete_ptr : Error<
	"cannot throw pointer to object of incomplete type %0">;
	def err_throw_sizeless : Error<
	"cannot throw object of sizeless type %0">;
	def warn_throw_underaligned_obj : Warning<
	"underaligned exception object thrown">,
	InGroup<UnderalignedExceptionObject>;
	def note_throw_underaligned_obj : Note<
	"required alignment of type %0 (%1 bytes) is larger than the supported "
	"alignment of C++ exception objects on this target (%2 bytes)">;
	def err_return_in_constructor_handler : Error<
	"return in the catch of a function try block of a constructor is illegal">;
	def warn_cdtor_function_try_handler_mem_expr : Warning<
	"cannot refer to a non-static member from the handler of a "
	"%select{constructor\|destructor}0 function try block">, InGroup<Exceptions>;

	let CategoryName = "Lambda Issue" in {
	def err_capture_more_than_once : Error<
	"%0 can appear only once in a capture list">;
	def err_reference_capture_with_reference_default : Error<
	"'&' cannot precede a capture when the capture default is '&'">;
	def err_copy_capture_with_copy_default : Error<
	"'&' must precede a capture when the capture default is '='">;
	def err_capture_does_not_name_variable : Error<
	"%0 in capture list does not name a variable">;
	def err_capture_non_automatic_variable : Error<
	"%0 cannot be captured because it does not have automatic storage "
	"duration">;
	def err_this_capture : Error<
	"'this' cannot be %select{implicitly \|}0captured in this context">;
	def note_lambda_this_capture_fixit : Note<
	"explicitly capture 'this'">;
	def err_lambda_capture_anonymous_var : Error<
	"unnamed variable cannot be implicitly captured in a lambda expression">;
	def err_lambda_capture_flexarray_type : Error<
	"variable %0 with flexible array member cannot be captured in "
	"a lambda expression">;
	def err_lambda_impcap : Error<
	"variable %0 cannot be implicitly captured in a lambda with no "
	"capture-default specified">;
	def note_lambda_variable_capture_fixit : Note<
	"capture %0 by %select{value\|reference}1">;
	def note_lambda_default_capture_fixit : Note<
	"default capture by %select{value\|reference}0">;
	def note_lambda_decl : Note<"lambda expression begins here">;
	def err_lambda_unevaluated_operand : Error<
	"lambda expression in an unevaluated operand">;
	def err_lambda_in_constant_expression : Error<
	"a lambda expression may not appear inside of a constant expression">;
	def err_lambda_in_invalid_context : Error<
	"a lambda expression cannot appear in this context">;
	def err_lambda_return_init_list : Error<
	"cannot deduce lambda return type from initializer list">;
	def err_lambda_capture_default_arg : Error<
	"lambda expression in default argument cannot capture any entity">;
	def err_lambda_incomplete_result : Error<
	"incomplete result type %0 in lambda expression">;
	def err_noreturn_lambda_has_return_expr : Error<
	"lambda declared 'noreturn' should not return">;
	def warn_maybe_falloff_nonvoid_lambda : Warning<
	"non-void lambda does not return a value in all control paths">,
	InGroup<ReturnType>;
	def warn_falloff_nonvoid_lambda : Warning<
	"non-void lambda does not return a value">,
	InGroup<ReturnType>;
	def err_access_lambda_capture : Error<
	// The ERRORs represent other special members that aren't constructors, in
	// hopes that someone will bother noticing and reporting if they appear
	"capture of variable '%0' as type %1 calls %select{private\|protected}3 "
	"%select{default \|copy \|move \|ERROR \|ERROR \|ERROR \|}2constructor">,
	AccessControl;
	def note_lambda_to_block_conv : Note<
	"implicit capture of lambda object due to conversion to block pointer "
	"here">;
	def note_var_explicitly_captured_here : Note<"variable %0 is"
	"%select{\| explicitly}1 captured here">;

	// C++14 lambda init-captures.
	def warn_cxx11_compat_init_capture : Warning<
	"initialized lambda captures are incompatible with C++ standards "
	"before C++14">, InGroup<CXXPre14Compat>, DefaultIgnore;
	def ext_init_capture : ExtWarn<
	"initialized lambda captures are a C++14 extension">, InGroup<CXX14>;
	def err_init_capture_no_expression : Error<
	"initializer missing for lambda capture %0">;
	def err_init_capture_multiple_expressions : Error<
	"initializer for lambda capture %0 contains multiple expressions">;
	def err_init_capture_paren_braces : Error<
	"cannot deduce type for lambda capture %1 from "
	"%select{parenthesized\|nested}0 initializer list">;
	def err_init_capture_deduction_failure : Error<
	"cannot deduce type for lambda capture %0 from initializer of type %2">;
	def err_init_capture_deduction_failure_from_init_list : Error<
	"cannot deduce type for lambda capture %0 from initializer list">;
	def warn_cxx17_compat_init_capture_pack : Warning<
	"initialized lambda capture packs are incompatible with C++ standards "
	"before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_init_capture_pack : ExtWarn<
	"initialized lambda pack captures are a C++20 extension">, InGroup<CXX20>;

	// C++14 generic lambdas.
	def warn_cxx11_compat_generic_lambda : Warning<
	"generic lambdas are incompatible with C++11">,
	InGroup<CXXPre14Compat>, DefaultIgnore;

	// C++17 '*this' captures.
	def warn_cxx14_compat_star_this_lambda_capture : Warning<
	"by value capture of '*this' is incompatible with C++ standards before C++17">,
	InGroup<CXXPre17Compat>, DefaultIgnore;
	def ext_star_this_lambda_capture_cxx17 : ExtWarn<
	"capture of '*this' by copy is a C++17 extension">, InGroup<CXX17>;

	// C++17 parameter shadows capture
	def err_parameter_shadow_capture : Error<
	"a lambda parameter cannot shadow an explicitly captured entity">;

	// C++20 [=, this] captures.
	def warn_cxx17_compat_equals_this_lambda_capture : Warning<
	"explicit capture of 'this' with a capture default of '=' is incompatible "
	"with C++ standards before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def ext_equals_this_lambda_capture_cxx20 : ExtWarn<
	"explicit capture of 'this' with a capture default of '=' "
	"is a C++20 extension">, InGroup<CXX20>;
	def warn_deprecated_this_capture : Warning<
	"implicit capture of 'this' with a capture default of '=' is deprecated">,
	InGroup<DeprecatedThisCapture>, DefaultIgnore;
	def note_deprecated_this_capture : Note<
	"add an explicit capture of 'this' to capture '*this' by reference">;

	// C++20 default constructible / assignable lambdas.
	def warn_cxx17_compat_lambda_def_ctor_assign : Warning<
	"%select{default construction\|assignment}0 of lambda is incompatible with "
	"C++ standards before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	}

	def err_return_in_captured_stmt : Error<
	"cannot return from %0">;
	def err_capture_block_variable : Error<
	"__block variable %0 cannot be captured in a "
	"%select{lambda expression\|captured statement}1">;

	def err_operator_arrow_circular : Error<
	"circular pointer delegation detected">;
	def err_operator_arrow_depth_exceeded : Error<
	"use of 'operator->' on type %0 would invoke a sequence of more than %1 "
	"'operator->' calls">;
	def note_operator_arrow_here : Note<
	"'operator->' declared here produces an object of type %0">;
	def note_operator_arrows_suppressed : Note<
	"(skipping %0 'operator->'%s0 in backtrace)">;
	def note_operator_arrow_depth : Note<
	"use -foperator-arrow-depth=N to increase 'operator->' limit">;

	def err_pseudo_dtor_base_not_scalar : Error<
	"object expression of non-scalar type %0 cannot be used in a "
	"pseudo-destructor expression">;
	def ext_pseudo_dtor_on_void : ExtWarn<
	"pseudo-destructors on type void are a Microsoft extension">,
	InGroup<MicrosoftVoidPseudoDtor>;
	def err_pseudo_dtor_type_mismatch : Error<
	"the type of object expression "
	"%diff{($) does not match the type being destroyed ($)\|"
	"does not match the type being destroyed}0,1 "
	"in pseudo-destructor expression">;
	def err_pseudo_dtor_call_with_args : Error<
	"call to pseudo-destructor cannot have any arguments">;
	def err_dtor_expr_without_call : Error<
	"reference to %select{destructor\|pseudo-destructor}0 must be called"
	"%select{\|; did you mean to call it with no arguments?}1">;
	def err_pseudo_dtor_destructor_non_type : Error<
	"%0 does not refer to a type name in pseudo-destructor expression; expected "
	"the name of type %1">;
	def err_invalid_use_of_function_type : Error<
	"a function type is not allowed here">;
	def err_invalid_use_of_array_type : Error<"an array type is not allowed here">;
	def err_typecheck_bool_condition : Error<
	"value of type %0 is not contextually convertible to 'bool'">;
	def err_typecheck_ambiguous_condition : Error<
	"conversion %diff{from $ to $\|between types}0,1 is ambiguous">;
	def err_typecheck_nonviable_condition : Error<
	"no viable conversion%select{%diff{ from $ to $\|}1,2\|"
	"%diff{ from returned value of type $ to function return type $\|}1,2}0">;
	def err_typecheck_nonviable_condition_incomplete : Error<
	"no viable conversion%diff{ from $ to incomplete type $\|}0,1">;
	def err_typecheck_deleted_function : Error<
	"conversion function %diff{from $ to $\|between types}0,1 "
	"invokes a deleted function">;

	def err_expected_class_or_namespace : Error<"%0 is not a class"
	"%select{ or namespace\|, namespace, or enumeration}1">;
	def err_invalid_declarator_scope : Error<"cannot define or redeclare %0 here "
	"because namespace %1 does not enclose namespace %2">;
	def err_export_non_namespace_scope_name : Error<
	"cannot export %0 as it is not at namespace scope">;
	def err_redeclaration_non_exported : Error <
	"cannot export redeclaration %0 here since the previous declaration "
	"%select{is not exported\|has internal linkage\|has module linkage}1">;
	def err_invalid_declarator_global_scope : Error<
	"definition or redeclaration of %0 cannot name the global scope">;
	def err_invalid_declarator_in_function : Error<
	"definition or redeclaration of %0 not allowed inside a function">;
	def err_invalid_declarator_in_block : Error<
	"definition or redeclaration of %0 not allowed inside a block">;
	def err_not_tag_in_scope : Error<
	"no %select{struct\|interface\|union\|class\|enum}0 named %1 in %2">;

	def err_no_typeid_with_fno_rtti : Error<
	"use of typeid requires -frtti">;
	def err_no_dynamic_cast_with_fno_rtti : Error<
	"use of dynamic_cast requires -frtti">;
	def warn_no_dynamic_cast_with_rtti_disabled: Warning<
	"dynamic_cast will not work since RTTI data is disabled by "
	"%select{-fno-rtti-data\|/GR-}0">, InGroup<RTTI>;
	def warn_no_typeid_with_rtti_disabled: Warning<
	"typeid will not work since RTTI data is disabled by "
	"%select{-fno-rtti-data\|/GR-}0">, InGroup<RTTI>;

	def err_cannot_form_pointer_to_member_of_reference_type : Error<
	"cannot form a pointer-to-member to member %0 of reference type %1">;
	def err_incomplete_object_call : Error<
	"incomplete type in call to object of type %0">;

	def warn_condition_is_assignment : Warning<"using the result of an "
	"assignment as a condition without parentheses">,
	InGroup<Parentheses>;
	def warn_free_nonheap_object
	: Warning<"attempt to call %0 on non-heap %select{object %2\|object: block expression\|object: lambda-to-function-pointer conversion}1">,
	InGroup<FreeNonHeapObject>;

	// Completely identical except off by default.
	def warn_condition_is_idiomatic_assignment : Warning<"using the result "
	"of an assignment as a condition without parentheses">,
	InGroup<DiagGroup<"idiomatic-parentheses">>, DefaultIgnore;
	def note_condition_assign_to_comparison : Note<
	"use '==' to turn this assignment into an equality comparison">;
	def note_condition_or_assign_to_comparison : Note<
	"use '!=' to turn this compound assignment into an inequality comparison">;
	def note_condition_assign_silence : Note<
	"place parentheses around the assignment to silence this warning">;

	def warn_equality_with_extra_parens : Warning<"equality comparison with "
	"extraneous parentheses">, InGroup<ParenthesesOnEquality>;
	def note_equality_comparison_to_assign : Note<
	"use '=' to turn this equality comparison into an assignment">;
	def note_equality_comparison_silence : Note<
	"remove extraneous parentheses around the comparison to silence this warning">;

	// assignment related diagnostics (also for argument passing, returning, etc).
	// In most of these diagnostics the %2 is a value from the
	// Sema::AssignmentAction enumeration
	def err_typecheck_convert_incompatible : Error<
	"%select{%diff{assigning to $ from incompatible type $\|"
	"assigning to type from incompatible type}0,1"
	"\|%diff{passing $ to parameter of incompatible type $\|"
	"passing type to parameter of incompatible type}0,1"
	"\|%diff{returning $ from a function with incompatible result type $\|"
	"returning type from a function with incompatible result type}0,1"
	"\|%diff{converting $ to incompatible type $\|"
	"converting type to incompatible type}0,1"
	"\|%diff{initializing $ with an expression of incompatible type $\|"
	"initializing type with an expression of incompatible type}0,1"
	"\|%diff{sending $ to parameter of incompatible type $\|"
	"sending type to parameter of incompatible type}0,1"
	"\|%diff{casting $ to incompatible type $\|"
	"casting type to incompatible type}0,1}2"
	"%select{\|; dereference with *\|"
	"; take the address with &\|"
	"; remove *\|"
	"; remove &}3"
	"%select{\|: different classes%diff{ ($ vs $)\|}5,6"
	"\|: different number of parameters (%5 vs %6)"
	"\|: type mismatch at %ordinal5 parameter%diff{ ($ vs $)\|}6,7"
	"\|: different return type%diff{ ($ vs $)\|}5,6"
	"\|: different qualifiers (%5 vs %6)"
	"\|: different exception specifications}4">;
	def err_typecheck_missing_return_type_incompatible : Error<
	"%diff{return type $ must match previous return type $\|"
	"return type must match previous return type}0,1 when %select{block "
	"literal\|lambda expression}2 has unspecified explicit return type">;

	def note_incomplete_class_and_qualified_id : Note<
	"conformance of forward class %0 to protocol %1 can not be confirmed">;
	def warn_incompatible_qualified_id : Warning<
	"%select{%diff{assigning to $ from incompatible type $\|"
	"assigning to type from incompatible type}0,1"
	"\|%diff{passing $ to parameter of incompatible type $\|"
	"passing type to parameter of incompatible type}0,1"
	"\|%diff{returning $ from a function with incompatible result type $\|"
	"returning type from a function with incompatible result type}0,1"
	"\|%diff{converting $ to incompatible type $\|"
	"converting type to incompatible type}0,1"
	"\|%diff{initializing $ with an expression of incompatible type $\|"
	"initializing type with an expression of incompatible type}0,1"
	"\|%diff{sending $ to parameter of incompatible type $\|"
	"sending type to parameter of incompatible type}0,1"
	"\|%diff{casting $ to incompatible type $\|"
	"casting type to incompatible type}0,1}2">;
	def err_incompatible_qualified_id : Error<
	"%select{%diff{assigning to $ from incompatible type $\|"
	"assigning to type from incompatible type}0,1"
	"\|%diff{passing $ to parameter of incompatible type $\|"
	"passing type to parameter of incompatible type}0,1"
	"\|%diff{returning $ from a function with incompatible result type $\|"
	"returning type from a function with incompatible result type}0,1"
	"\|%diff{converting $ to incompatible type $\|"
	"converting type to incompatible type}0,1"
	"\|%diff{initializing $ with an expression of incompatible type $\|"
	"initializing type with an expression of incompatible type}0,1"
	"\|%diff{sending $ to parameter of incompatible type $\|"
	"sending type to parameter of incompatible type}0,1"
	"\|%diff{casting $ to incompatible type $\|"
	"casting type to incompatible type}0,1}2">;
	def err_typecheck_convert_pointer_int : Error<
	"incompatible pointer to integer conversion "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	"%select{\|; dereference with *\|"
	"; take the address with &\|"
	"; remove *\|"
	"; remove &}3">;
	def ext_typecheck_convert_pointer_int : ExtWarn<
	err_typecheck_convert_pointer_int.Summary>,
	InGroup<IntConversion>, DefaultError;
	def err_typecheck_convert_int_pointer : Error<
	"incompatible integer to pointer conversion "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	"%select{\|; dereference with *\|"
	"; take the address with &\|"
	"; remove *\|"
	"; remove &}3">;
	def ext_typecheck_convert_int_pointer : ExtWarn<
	err_typecheck_convert_int_pointer.Summary>,
	InGroup<IntConversion>, DefaultError;
	def ext_typecheck_convert_pointer_void_func : Extension<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" converts between void pointer and function pointer">;
	def err_typecheck_convert_pointer_void_func : Error<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" converts between void pointer and function pointer">;
	def ext_typecheck_convert_incompatible_pointer_sign : ExtWarn<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" converts between pointers to integer types %select{with different sign\|"
	"where one is of the unique plain 'char' type and the other is not}3">,
	InGroup<DiagGroup<"pointer-sign">>;
	def err_typecheck_convert_incompatible_pointer_sign :
	Error<ext_typecheck_convert_incompatible_pointer_sign.Summary>;
	def ext_typecheck_convert_incompatible_pointer : ExtWarn<
	"incompatible pointer types "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	"%select{\|; dereference with *\|"
	"; take the address with &\|"
	"; remove *\|"
	"; remove &}3">,
	InGroup<IncompatiblePointerTypes>;
	def err_typecheck_convert_incompatible_pointer : Error<
	"incompatible pointer types "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	"%select{\|; dereference with *\|"
	"; take the address with &\|"
	"; remove *\|"
	"; remove &}3">;
	def err_typecheck_convert_incompatible_function_pointer : Error<
	"incompatible function pointer types "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	"%select{\|; dereference with *\|"
	"; take the address with &\|"
	"; remove *\|"
	"; remove &}3">;
	def ext_typecheck_convert_incompatible_function_pointer : ExtWarn<
	err_typecheck_convert_incompatible_function_pointer.Summary>,
	InGroup<IncompatibleFunctionPointerTypes>, DefaultError;
	def warn_typecheck_convert_incompatible_function_pointer_strict : Warning<
	err_typecheck_convert_incompatible_function_pointer.Summary>,
	InGroup<DiagGroup<"incompatible-function-pointer-types-strict">>, DefaultIgnore;
	def ext_typecheck_convert_discards_qualifiers : ExtWarn<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" discards qualifiers">,
	InGroup<IncompatiblePointerTypesDiscardsQualifiers>;
	def err_typecheck_convert_discards_qualifiers : Error<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" discards qualifiers">;
	def ext_nested_pointer_qualifier_mismatch : ExtWarn<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" discards qualifiers in nested pointer types">,
	InGroup<IncompatiblePointerTypesDiscardsQualifiers>;
	def err_nested_pointer_qualifier_mismatch : Error<
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" discards qualifiers in nested pointer types">;
	def warn_incompatible_vectors : Warning<
	"incompatible vector types "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2">,
	InGroup<VectorConversion>, DefaultIgnore;
	def err_incompatible_vectors : Error<
	"incompatible vector types "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2">;
	def err_int_to_block_pointer : Error<
	"invalid block pointer conversion "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2">;
	def err_typecheck_convert_incompatible_block_pointer : Error<
	"incompatible block pointer types "
	"%select{%diff{assigning to $ from $\|assigning to different types}0,1"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2">;
	def err_typecheck_incompatible_address_space : Error<
	"%select{%diff{assigning $ to $\|assigning to different types}1,0"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" changes address space of pointer">;
	def err_typecheck_incompatible_nested_address_space : Error<
	"%select{%diff{assigning $ to $\|assigning to different types}1,0"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" changes address space of nested pointer">;
	def err_typecheck_incompatible_ownership : Error<
	"%select{%diff{assigning $ to $\|assigning to different types}1,0"
	"\|%diff{passing $ to parameter of type $\|"
	"passing to parameter of different type}0,1"
	"\|%diff{returning $ from a function with result type $\|"
	"returning from function with different return type}0,1"
	"\|%diff{converting $ to type $\|converting between types}0,1"
	"\|%diff{initializing $ with an expression of type $\|"
	"initializing with expression of different type}0,1"
	"\|%diff{sending $ to parameter of type $\|"
	"sending to parameter of different type}0,1"
	"\|%diff{casting $ to type $\|casting between types}0,1}2"
	" changes retain/release properties of pointer">;
	def err_typecheck_comparison_of_distinct_blocks : Error<
	"comparison of distinct block types%diff{ ($ and $)\|}0,1">;

	def err_typecheck_array_not_modifiable_lvalue : Error<
	"array type %0 is not assignable">;
	def err_typecheck_non_object_not_modifiable_lvalue : Error<
	"non-object type %0 is not assignable">;
	def err_typecheck_expression_not_modifiable_lvalue : Error<
	"expression is not assignable">;
	def err_typecheck_incomplete_type_not_modifiable_lvalue : Error<
	"incomplete type %0 is not assignable">;
	def err_typecheck_lvalue_casts_not_supported : Error<
	"assignment to cast is illegal, lvalue casts are not supported">;

	def err_typecheck_duplicate_vector_components_not_mlvalue : Error<
	"vector is not assignable (contains duplicate components)">;
	def err_block_decl_ref_not_modifiable_lvalue : Error<
	"variable is not assignable (missing __block type specifier)">;
	def err_lambda_decl_ref_not_modifiable_lvalue : Error<
	"cannot assign to a variable captured by copy in a non-mutable lambda">;
	def err_typecheck_call_not_function : Error<
	"called object type %0 is not a function or function pointer">;
	def err_call_incomplete_return : Error<
	"calling function with incomplete return type %0">;
	def err_call_function_incomplete_return : Error<
	"calling %0 with incomplete return type %1">;
	def err_call_incomplete_argument : Error<
	"argument type %0 is incomplete">;
	def err_typecheck_call_too_few_args : Error<
	"too few %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected %1, have %2">;
	def err_typecheck_call_too_few_args_one : Error<
	"too few %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"single argument %1 was not specified">;
	def err_typecheck_call_too_few_args_at_least : Error<
	"too few %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected at least %1, have %2">;
	def err_typecheck_call_too_few_args_at_least_one : Error<
	"too few %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"at least argument %1 must be specified">;
	def err_typecheck_call_too_few_args_suggest : Error<
	"too few %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected %1, have %2; did you mean %3?">;
	def err_typecheck_call_too_few_args_at_least_suggest : Error<
	"too few %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected at least %1, have %2; did you mean %3?">;
	def err_typecheck_call_too_many_args : Error<
	"too many %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected %1, have %2">;
	def err_typecheck_call_too_many_args_one : Error<
	"too many %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected single argument %1, have %2 arguments">;
	def err_typecheck_call_too_many_args_at_most : Error<
	"too many %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected at most %1, have %2">;
	def err_typecheck_call_too_many_args_at_most_one : Error<
	"too many %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected at most single argument %1, have %2 arguments">;
	def err_typecheck_call_too_many_args_suggest : Error<
	"too many %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected %1, have %2; did you mean %3?">;
	def err_typecheck_call_too_many_args_at_most_suggest : Error<
	"too many %select{\|\|\|execution configuration }0arguments to "
	"%select{function\|block\|method\|kernel function}0 call, "
	"expected at most %1, have %2; did you mean %3?">;

	def err_arc_typecheck_convert_incompatible_pointer : Error<
	"incompatible pointer types passing retainable parameter of type %0"
	"to a CF function expecting %1 type">;

	def err_builtin_fn_use : Error<"builtin functions must be directly called">;

	def warn_call_wrong_number_of_arguments : Warning<
	"too %select{few\|many}0 arguments in call to %1">;
	def err_atomic_builtin_must_be_pointer : Error<
	"address argument to atomic builtin must be a pointer (%0 invalid)">;
	def err_atomic_builtin_must_be_pointer_intptr : Error<
	"address argument to atomic builtin must be a pointer to integer or pointer"
	" (%0 invalid)">;
	def err_atomic_builtin_cannot_be_const : Error<
	"address argument to atomic builtin cannot be const-qualified (%0 invalid)">;
	def err_atomic_builtin_must_be_pointer_intfltptr : Error<
	"address argument to atomic builtin must be a pointer to integer,"
	" floating-point or pointer (%0 invalid)">;
	def err_atomic_builtin_pointer_size : Error<
	"address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
	"type (%0 invalid)">;
	def err_atomic_exclusive_builtin_pointer_size : Error<
	"address argument to load or store exclusive builtin must be a pointer to"
	" 1,2,4 or 8 byte type (%0 invalid)">;
	def err_atomic_builtin_ext_int_size : Error<
	"Atomic memory operand must have a power-of-two size">;
	def err_atomic_builtin_bit_int_prohibit : Error<
	"argument to atomic builtin of type '_BitInt' is not supported">;
	def err_atomic_op_needs_atomic : Error<
	"address argument to atomic operation must be a pointer to _Atomic "
	"type (%0 invalid)">;
	def err_atomic_op_needs_non_const_atomic : Error<
	"address argument to atomic operation must be a pointer to non-%select{const\|constant}0 _Atomic "
	"type (%1 invalid)">;
	def err_atomic_op_needs_non_const_pointer : Error<
	"address argument to atomic operation must be a pointer to non-const "
	"type (%0 invalid)">;
	def err_atomic_op_needs_trivial_copy : Error<
	"address argument to atomic operation must be a pointer to a "
	"trivially-copyable type (%0 invalid)">;
	def err_atomic_op_needs_atomic_int_ptr_or_fp : Error<
	"address argument to atomic operation must be a pointer to %select{\|atomic }0"
	"integer, pointer or supported floating point type (%1 invalid)">;
	def err_atomic_op_needs_atomic_int_or_ptr : Error<
	"address argument to atomic operation must be a pointer to %select{\|atomic }0"
	"integer or pointer (%1 invalid)">;
	def err_atomic_op_needs_atomic_int : Error<
	"address argument to atomic operation must be a pointer to "
	"%select{\|atomic }0integer (%1 invalid)">;
	def warn_atomic_op_has_invalid_memory_order : Warning<
	"memory order argument to atomic operation is invalid">,
	InGroup<DiagGroup<"atomic-memory-ordering">>;
	def err_atomic_op_has_invalid_synch_scope : Error<
	"synchronization scope argument to atomic operation is invalid">;
	def warn_atomic_implicit_seq_cst : Warning<
	"implicit use of sequentially-consistent atomic may incur stronger memory barriers than necessary">,
	InGroup<DiagGroup<"atomic-implicit-seq-cst">>, DefaultIgnore;

	def err_overflow_builtin_must_be_int : Error<
	"operand argument to overflow builtin must be an integer (%0 invalid)">;
	def err_overflow_builtin_must_be_ptr_int : Error<
	"result argument to overflow builtin must be a pointer "
	"to a non-const integer (%0 invalid)">;
	def err_overflow_builtin_bit_int_max_size : Error<
	"__builtin_mul_overflow does not support 'signed _BitInt' operands of more "
	"than %0 bits">;
	def err_expected_struct_pointer_argument : Error<
	"expected pointer to struct as %ordinal0 argument to %1, found %2">;
	def err_expected_callable_argument : Error<
	"expected a callable expression as %ordinal0 argument to %1, found %2">;
	def note_building_builtin_dump_struct_call : Note<
	"in call to printing function with arguments '(%0)' while dumping struct">;

	def err_atomic_load_store_uses_lib : Error<
	"atomic %select{load\|store}0 requires runtime support that is not "
	"available for this target">;

	def err_nontemporal_builtin_must_be_pointer : Error<
	"address argument to nontemporal builtin must be a pointer (%0 invalid)">;
	def err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector : Error<
	"address argument to nontemporal builtin must be a pointer to integer, float, "
	"pointer, or a vector of such types (%0 invalid)">;

	def err_deleted_function_use : Error<"attempt to use a deleted function">;
	def err_deleted_inherited_ctor_use : Error<
	"constructor inherited by %0 from base class %1 is implicitly deleted">;

	def note_called_by : Note<"called by %0">;
	def err_kern_type_not_void_return : Error<
	"kernel function type %0 must have void return type">;
	def err_kern_is_nonstatic_method : Error<
	"kernel function %0 must be a free function or static member function">;
	def err_config_scalar_return : Error<
	"CUDA special function '%0' must have scalar return type">;
	def err_kern_call_not_global_function : Error<
	"kernel call to non-global function %0">;
	def err_global_call_not_config : Error<
	"call to global function %0 not configured">;
	def err_ref_bad_target : Error<
	"reference to %select{__device__\|__global__\|__host__\|__host__ __device__}0 "
	"%select{function\|variable}1 %2 in %select{__device__\|__global__\|__host__\|__host__ __device__}3 function">;
	def note_cuda_const_var_unpromoted : Note<
	"const variable cannot be emitted on device side due to dynamic initialization">;
	def note_cuda_host_var : Note<
	"host variable declared here">;
	def err_ref_bad_target_global_initializer : Error<
	"reference to %select{__device__\|__global__\|__host__\|__host__ __device__}0 "
	"function %1 in global initializer">;
	def err_capture_bad_target : Error<
	"capture host variable %0 by reference in device or host device lambda function">;
	def warn_maybe_capture_bad_target_this_ptr : Warning<
	"capture host side class data member by this pointer in device or host device lambda function "
	"may result in invalid memory access if this pointer is not accessible on device side">,
	InGroup<DiagGroup<"gpu-maybe-wrong-side">>;
	def warn_kern_is_method : Extension<
	"kernel function %0 is a member function; this may not be accepted by nvcc">,
	InGroup<CudaCompat>;
	def warn_kern_is_inline : Warning<
	"ignored 'inline' attribute on kernel function %0">,
	InGroup<CudaCompat>;
	def err_variadic_device_fn : Error<
	"CUDA device code does not support variadic functions">;
	def err_va_arg_in_device : Error<
	"CUDA device code does not support va_arg">;
	def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">;
	def err_cuda_unattributed_constexpr_cannot_overload_device : Error<
	"constexpr function %0 without __host__ or __device__ attributes cannot "
	"overload __device__ function with same signature. Add a __host__ "
	"attribute, or build with -fno-cuda-host-device-constexpr.">;
	def note_cuda_conflicting_device_function_declared_here : Note<
	"conflicting __device__ function declared here">;
	def err_cuda_device_exceptions : Error<
	"cannot use '%0' in "
	"%select{__device__\|__global__\|__host__\|__host__ __device__}1 function">;
	def err_dynamic_var_init : Error<
	"dynamic initialization is not supported for "
	"__device__, __constant__, __shared__, and __managed__ variables.">;
	def err_shared_var_init : Error<
	"initialization is not supported for __shared__ variables.">;
	def err_cuda_vla : Error<
	"cannot use variable-length arrays in "
	"%select{__device__\|__global__\|__host__\|__host__ __device__}0 functions">;
	def err_cuda_extern_shared : Error<"__shared__ variable %0 cannot be 'extern'">;
	def err_cuda_host_shared : Error<
	"__shared__ local variables not allowed in "
	"%select{__device__\|__global__\|__host__\|__host__ __device__}0 functions">;
	def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
	"__managed__ are not allowed on non-static local variables">;
	def err_cuda_ovl_target : Error<
	"%select{__device__\|__global__\|__host__\|__host__ __device__}0 function %1 "
	"cannot overload %select{__device__\|__global__\|__host__\|__host__ __device__}2 function %3">;
	def note_cuda_ovl_candidate_target_mismatch : Note<
	"candidate template ignored: target attributes do not match">;

	def err_cuda_device_builtin_surftex_cls_template : Error<
	"illegal device builtin %select{surface\|texture}0 reference "
	"class template %1 declared here">;
	def note_cuda_device_builtin_surftex_cls_should_have_n_args : Note<
	"%0 needs to have exactly %1 template parameters">;
	def note_cuda_device_builtin_surftex_cls_should_have_match_arg : Note<
	"the %select{1st\|2nd\|3rd}1 template parameter of %0 needs to be "
	"%select{a type\|an integer or enum value}2">;

	def err_cuda_device_builtin_surftex_ref_decl : Error<
	"illegal device builtin %select{surface\|texture}0 reference "
	"type %1 declared here">;
	def note_cuda_device_builtin_surftex_should_be_template_class : Note<
	"%0 needs to be instantiated from a class template with proper "
	"template arguments">;

	def err_hip_invalid_args_builtin_mangled_name : Error<
	"invalid argument: symbol must be a device-side function or global variable">;

	def warn_non_pod_vararg_with_format_string : Warning<
	"cannot pass %select{non-POD\|non-trivial}0 object of type %1 to variadic "
	"%select{function\|block\|method\|constructor}2; expected type from format "
	"string was %3">, InGroup<NonPODVarargs>, DefaultError;
	// The arguments to this diagnostic should match the warning above.
	def err_cannot_pass_objc_interface_to_vararg_format : Error<
	"cannot pass object with interface type %1 by value to variadic "
	"%select{function\|block\|method\|constructor}2; expected type from format "
	"string was %3">;
	def err_cannot_pass_non_trivial_c_struct_to_vararg : Error<
	"cannot pass non-trivial C object of type %0 by value to variadic "
	"%select{function\|block\|method\|constructor}1">;


	def err_cannot_pass_objc_interface_to_vararg : Error<
	"cannot pass object with interface type %0 by value through variadic "
	"%select{function\|block\|method\|constructor}1">;
	def warn_cannot_pass_non_pod_arg_to_vararg : Warning<
	"cannot pass object of %select{non-POD\|non-trivial}0 type %1 through variadic"
	" %select{function\|block\|method\|constructor}2; call will abort at runtime">,
	InGroup<NonPODVarargs>, DefaultError;
	def warn_cxx98_compat_pass_non_pod_arg_to_vararg : Warning<
	"passing object of trivial but non-POD type %0 through variadic"
	" %select{function\|block\|method\|constructor}1 is incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;
	def warn_pass_class_arg_to_vararg : Warning<
	"passing object of class type %0 through variadic "
	"%select{function\|block\|method\|constructor}1"
	"%select{\|; did you mean to call '%3'?}2">,
	InGroup<ClassVarargs>, DefaultIgnore;
	def err_cannot_pass_to_vararg : Error<
	"cannot pass %select{expression of type %1\|initializer list}0 to variadic "
	"%select{function\|block\|method\|constructor}2">;
	def err_cannot_pass_to_vararg_format : Error<
	"cannot pass %select{expression of type %1\|initializer list}0 to variadic "
	"%select{function\|block\|method\|constructor}2; expected type from format "
	"string was %3">;

	def err_typecheck_call_invalid_ordered_compare : Error<
	"ordered compare requires two args of floating point type"
	"%diff{ ($ and $)\|}0,1">;
	def err_typecheck_call_invalid_unary_fp : Error<
	"floating point classification requires argument of floating point type "
	"(passed in %0)">;
	def err_typecheck_cond_expect_int_float : Error<
	"used type %0 where integer or floating point type is required">;
	def err_typecheck_cond_expect_scalar : Error<
	"used type %0 where arithmetic or pointer type is required">;
	def err_typecheck_cond_expect_nonfloat : Error<
	"used type %0 where floating point type is not allowed">;
	def ext_typecheck_cond_one_void : Extension<
	"C99 forbids conditional expressions with only one void side">;
	def err_typecheck_cast_to_incomplete : Error<
	"cast to incomplete type %0">;
	def ext_typecheck_cast_nonscalar : Extension<
	"C99 forbids casting nonscalar type %0 to the same type">;
	def ext_typecheck_cast_to_union : Extension<
	"cast to union type is a GNU extension">,
	InGroup<GNUUnionCast>;
	def err_typecheck_cast_to_union_no_type : Error<
	"cast to union type from type %0 not present in union">;
	def err_cast_pointer_from_non_pointer_int : Error<
	"operand of type %0 cannot be cast to a pointer type">;
	def warn_cast_pointer_from_sel : Warning<
	"cast of type %0 to %1 is deprecated; use sel_getName instead">,
	InGroup<SelTypeCast>;
	def warn_function_def_in_objc_container : Warning<
	"function definition inside an Objective-C container is deprecated">,
	InGroup<FunctionDefInObjCContainer>;
	def err_typecheck_call_requires_real_fp : Error<
	"argument type %0 is not a real floating point type">;
	def err_typecheck_call_different_arg_types : Error<
	"arguments are of different types%diff{ ($ vs $)\|}0,1">;

	def warn_cast_calling_conv : Warning<
	"cast between incompatible calling conventions '%0' and '%1'; "
	"calls through this pointer may abort at runtime">,
	InGroup<DiagGroup<"cast-calling-convention">>;
	def note_change_calling_conv_fixit : Note<
	"consider defining %0 with the '%1' calling convention">;
	def warn_bad_function_cast : Warning<
	"cast from function call of type %0 to non-matching type %1">,
	InGroup<BadFunctionCast>, DefaultIgnore;
	def warn_cast_function_type : Warning<
	"cast %diff{from $ to $ \|}0,1converts to incompatible function type">,
	InGroup<CastFunctionType>, DefaultIgnore;
	def warn_cast_function_type_strict : Warning<warn_cast_function_type.Summary>,
	InGroup<CastFunctionTypeStrict>, DefaultIgnore;
	def err_cast_pointer_to_non_pointer_int : Error<
	"pointer cannot be cast to type %0">;
	def err_nullptr_cast : Error<
	"cannot cast an object of type %select{'nullptr_t' to %1\|%1 to 'nullptr_t'}0"
	>;
	def err_cast_to_bfloat16 : Error<"cannot type-cast to __bf16">;
	def err_cast_from_bfloat16 : Error<"cannot type-cast from __bf16">;
	def err_typecheck_expect_scalar_operand : Error<
	"operand of type %0 where arithmetic or pointer type is required">;
	def err_typecheck_cond_incompatible_operands : Error<
	"incompatible operand types%diff{ ($ and $)\|}0,1">;
	def err_typecheck_expect_flt_or_vector : Error<
	"invalid operand of type %0 where floating, complex or "
	"a vector of such types is required">;
	def err_cast_selector_expr : Error<
	"cannot type cast @selector expression">;
	def err_make_signed_integral_only : Error<
	"'%select{make_unsigned\|make_signed}0' is only compatible with "
	"non-%select{bool\|_BitInt(1)}1 integers and enum types, but was given "
	"%2%select{\| whose underlying type is %4}3">;
	def ext_typecheck_cond_incompatible_pointers : ExtWarn<
	"pointer type mismatch%diff{ ($ and $)\|}0,1">,
	InGroup<DiagGroup<"pointer-type-mismatch">>;
	def ext_typecheck_cond_pointer_integer_mismatch : ExtWarn<
	"pointer/integer type mismatch in conditional expression"
	"%diff{ ($ and $)\|}0,1">,
	InGroup<DiagGroup<"conditional-type-mismatch">>;
	def err_typecheck_choose_expr_requires_constant : Error<
	"'__builtin_choose_expr' requires a constant expression">;
	def warn_unused_expr : Warning<"expression result unused">,
	InGroup<UnusedValue>;
	def warn_unused_comma_left_operand : Warning<
	"left operand of comma operator has no effect">,
	InGroup<UnusedValue>;
	def warn_unused_voidptr : Warning<
	"expression result unused; should this cast be to 'void'?">,
	InGroup<UnusedValue>;
	def warn_unused_property_expr : Warning<
	"property access result unused - getters should not be used for side effects">,
	InGroup<UnusedGetterReturnValue>;
	def warn_unused_container_subscript_expr : Warning<
	"container access result unused - container access should not be used for side effects">,
	InGroup<UnusedValue>;
	def warn_unused_call : Warning<
	"ignoring return value of function declared with %0 attribute">,
	InGroup<UnusedValue>;
	def warn_unused_constructor : Warning<
	"ignoring temporary created by a constructor declared with %0 attribute">,
	InGroup<UnusedValue>;
	def warn_unused_constructor_msg : Warning<
	"ignoring temporary created by a constructor declared with %0 attribute: %1">,
	InGroup<UnusedValue>;
	def warn_side_effects_unevaluated_context : Warning<
	"expression with side effects has no effect in an unevaluated context">,
	InGroup<UnevaluatedExpression>;
	def warn_side_effects_typeid : Warning<
	"expression with side effects will be evaluated despite being used as an "
	"operand to 'typeid'">, InGroup<PotentiallyEvaluatedExpression>;
	def warn_unused_result : Warning<
	"ignoring return value of function declared with %0 attribute">,
	InGroup<UnusedResult>;
	def warn_unused_result_msg : Warning<
	"ignoring return value of function declared with %0 attribute: %1">,
	InGroup<UnusedResult>;
	def warn_unused_result_typedef_unsupported_spelling : Warning<
	"'[[%select{nodiscard\|gnu::warn_unused_result}0]]' attribute ignored when "
	"applied to a typedef; consider using '__attribute__((warn_unused_result))' "
	"or '[[clang::warn_unused_result]]' instead">, InGroup<IgnoredAttributes>;
	def warn_unused_volatile : Warning<
	"expression result unused; assign into a variable to force a volatile load">,
	InGroup<DiagGroup<"unused-volatile-lvalue">>;

	def ext_cxx14_attr : Extension<
	"use of the %0 attribute is a C++14 extension">, InGroup<CXX14Attrs>;
	def ext_cxx17_attr : Extension<
	"use of the %0 attribute is a C++17 extension">, InGroup<CXX17Attrs>;
	def ext_cxx20_attr : Extension<
	"use of the %0 attribute is a C++20 extension">, InGroup<CXX20Attrs>;

	def warn_unused_comparison : Warning<
	"%select{equality\|inequality\|relational\|three-way}0 comparison result unused">,
	InGroup<UnusedComparison>;
	def note_inequality_comparison_to_or_assign : Note<
	"use '\|=' to turn this inequality comparison into an or-assignment">;

	def err_incomplete_type_used_in_type_trait_expr : Error<
	"incomplete type %0 used in type trait expression">;

	// C++20 constinit and require_constant_initialization attribute
	def warn_cxx20_compat_constinit : Warning<
	"'constinit' specifier is incompatible with C++ standards before C++20">,
	InGroup<CXX20Compat>, DefaultIgnore;
	def err_constinit_local_variable : Error<
	"local variable cannot be declared 'constinit'">;
	def err_require_constant_init_failed : Error<
	"variable does not have a constant initializer">;
	def note_declared_required_constant_init_here : Note<
	"required by %select{'require_constant_initialization' attribute\|"
	"'constinit' specifier}0 here">;
	def ext_constinit_missing : ExtWarn<
	"'constinit' specifier missing on initializing declaration of %0">,
	InGroup<DiagGroup<"missing-constinit">>;
	def note_constinit_specified_here : Note<"variable declared constinit here">;
	def err_constinit_added_too_late : Error<
	"'constinit' specifier added after initialization of variable">;
	def warn_require_const_init_added_too_late : Warning<
	"'require_constant_initialization' attribute added after initialization "
	"of variable">, InGroup<IgnoredAttributes>;
	def note_constinit_missing_here : Note<
	"add the "
	"%select{'require_constant_initialization' attribute\|'constinit' specifier}0 "
	"to the initializing declaration here">;

	def err_dimension_expr_not_constant_integer : Error<
	"dimension expression does not evaluate to a constant unsigned int">;

	def err_typecheck_cond_incompatible_operands_null : Error<
	"non-pointer operand type %0 incompatible with %select{NULL\|nullptr}1">;
	def ext_empty_struct_union : Extension<
	"empty %select{struct\|union}0 is a GNU extension">, InGroup<GNUEmptyStruct>;
	def ext_no_named_members_in_struct_union : Extension<
	"%select{struct\|union}0 without named members is a GNU extension">, InGroup<GNUEmptyStruct>;
	def warn_zero_size_struct_union_compat : Warning<"%select{\|empty }0"
	"%select{struct\|union}1 has size 0 in C, %select{size 1\|non-zero size}2 in C++">,
	InGroup<CXXCompat>, DefaultIgnore;
	def warn_zero_size_struct_union_in_extern_c : Warning<"%select{\|empty }0"
	"%select{struct\|union}1 has size 0 in C, %select{size 1\|non-zero size}2 in C++">,
	InGroup<ExternCCompat>;
	def warn_cast_qual : Warning<"cast from %0 to %1 drops %select{const and "
	"volatile qualifiers\|const qualifier\|volatile qualifier}2">,
	InGroup<CastQual>, DefaultIgnore;
	def warn_cast_qual2 : Warning<"cast from %0 to %1 must have all intermediate "
	"pointers const qualified to be safe">, InGroup<CastQual>, DefaultIgnore;
	def warn_redefine_extname_not_applied : Warning<
	"#pragma redefine_extname is applicable to external C declarations only; "
	"not applied to %select{function\|variable}0 %1">,
	InGroup<Pragmas>;
	} // End of general sema category.

	// inline asm.
	let CategoryName = "Inline Assembly Issue" in {
	def err_asm_pmf_through_constraint_not_permitted
	: Error<"cannot pass a pointer-to-member through register-constrained "
	"inline assembly parameter">;
	def err_asm_invalid_lvalue_in_output : Error<"invalid lvalue in asm output">;
	def err_asm_invalid_output_constraint : Error<
	"invalid output constraint '%0' in asm">;
	def err_asm_invalid_lvalue_in_input : Error<
	"invalid lvalue in asm input for constraint '%0'">;
	def err_asm_invalid_input_constraint : Error<
	"invalid input constraint '%0' in asm">;
	def err_asm_tying_incompatible_types : Error<
	"unsupported inline asm: input with type "
	"%diff{$ matching output with type $\|}0,1">;
	def err_asm_unexpected_constraint_alternatives : Error<
	"asm constraint has an unexpected number of alternatives: %0 vs %1">;
	def err_asm_incomplete_type : Error<"asm operand has incomplete type %0">;
	def err_asm_unknown_register_name : Error<"unknown register name '%0' in asm">;
	def err_asm_unwind_and_goto : Error<"unwind clobber can't be used with asm goto">;
	def err_asm_invalid_global_var_reg : Error<"register '%0' unsuitable for "
	"global register variables on this target">;
	def err_asm_register_size_mismatch : Error<"size of register '%0' does not "
	"match variable size">;
	def err_asm_bad_register_type : Error<"bad type for named register variable">;
	def err_asm_invalid_input_size : Error<
	"invalid input size for constraint '%0'">;
	def err_asm_invalid_output_size : Error<
	"invalid output size for constraint '%0'">;
	def err_invalid_asm_cast_lvalue : Error<
	"invalid use of a cast in a inline asm context requiring an lvalue: "
	"remove the cast or build with -fheinous-gnu-extensions">;
	def err_invalid_asm_value_for_constraint
	: Error <"value '%0' out of range for constraint '%1'">;
	def err_asm_non_addr_value_in_memory_constraint : Error <
	"reference to a %select{bit-field\|vector element\|global register variable}0"
	" in asm %select{input\|output}1 with a memory constraint '%2'">;
	def err_asm_input_duplicate_match : Error<
	"more than one input constraint matches the same output '%0'">;
	def err_store_value_to_reg : Error<
	"impossible constraint in asm: can't store value into a register">;

	def warn_asm_label_on_auto_decl : Warning<
	"ignored asm label '%0' on automatic variable">;
	def warn_invalid_asm_cast_lvalue : Warning<
	"invalid use of a cast in an inline asm context requiring an lvalue: "
	"accepted due to -fheinous-gnu-extensions, but clang may remove support "
	"for this in the future">;
	def warn_asm_mismatched_size_modifier : Warning<
	"value size does not match register size specified by the constraint "
	"and modifier">,
	InGroup<ASMOperandWidths>;

	def note_asm_missing_constraint_modifier : Note<
	"use constraint modifier \"%0\"">;
	def note_asm_input_duplicate_first : Note<
	"constraint '%0' is already present here">;
	def error_duplicate_asm_operand_name : Error<
	"duplicate use of asm operand name \"%0\"">;
	def note_duplicate_asm_operand_name : Note<
	"asm operand name \"%0\" first referenced here">;
	}

	def error_inoutput_conflict_with_clobber : Error<
	"asm-specifier for input or output variable conflicts with asm"
	" clobber list">;

	let CategoryName = "Semantic Issue" in {

	def err_invalid_conversion_between_matrixes : Error<
	"conversion between matrix types%diff{ $ and $\|}0,1 of different size is not allowed">;

	def err_invalid_conversion_between_matrix_and_type : Error<
	"conversion between matrix type %0 and incompatible type %1 is not allowed">;

	def err_invalid_conversion_between_vectors : Error<
	"invalid conversion between vector type%diff{ $ and $\|}0,1 of different "
	"size">;
	def err_invalid_conversion_between_vector_and_integer : Error<
	"invalid conversion between vector type %0 and integer type %1 "
	"of different size">;

	def err_opencl_function_pointer : Error<
	"%select{pointers\|references}0 to functions are not allowed">;

	def err_opencl_taking_address_capture : Error<
	"taking address of a capture is not allowed">;

	def err_invalid_conversion_between_vector_and_scalar : Error<
	"invalid conversion between vector type %0 and scalar type %1">;

	// C++ member initializers.
	def err_only_constructors_take_base_inits : Error<
	"only constructors take base initializers">;

	def err_multiple_mem_initialization : Error <
	"multiple initializations given for non-static member %0">;
	def err_multiple_mem_union_initialization : Error <
	"initializing multiple members of union">;
	def err_multiple_base_initialization : Error <
	"multiple initializations given for base %0">;

	def err_mem_init_not_member_or_class : Error<
	"member initializer %0 does not name a non-static data member or base "
	"class">;

	def warn_initializer_out_of_order : Warning<
	"%select{field\|base class}0 %1 will be initialized after "
	"%select{field\|base}2 %3">,
	InGroup<ReorderCtor>, DefaultIgnore;

	def warn_some_initializers_out_of_order : Warning<
	"initializer order does not match the declaration order">,
	InGroup<ReorderCtor>, DefaultIgnore;

	def note_initializer_out_of_order : Note<
	"%select{field\|base class}0 %1 will be initialized after "
	"%select{field\|base}2 %3">;

	def warn_abstract_vbase_init_ignored : Warning<
	"initializer for virtual base class %0 of abstract class %1 "
	"will never be used">,
	InGroup<DiagGroup<"abstract-vbase-init">>, DefaultIgnore;

	def err_base_init_does_not_name_class : Error<
	"constructor initializer %0 does not name a class">;
	def err_base_init_direct_and_virtual : Error<
	"base class initializer %0 names both a direct base class and an "
	"inherited virtual base class">;
	def err_not_direct_base_or_virtual : Error<
	"type %0 is not a direct or virtual base of %1">;

	def err_in_class_initializer_non_const : Error<
	"non-const static data member must be initialized out of line">;
	def err_in_class_initializer_volatile : Error<
	"static const volatile data member must be initialized out of line">;
	def err_in_class_initializer_bad_type : Error<
	"static data member of type %0 must be initialized out of line">;
	def ext_in_class_initializer_float_type : ExtWarn<
	"in-class initializer for static data member of type %0 is a GNU extension">,
	InGroup<GNUStaticFloatInit>;
	def ext_in_class_initializer_float_type_cxx11 : ExtWarn<
	"in-class initializer for static data member of type %0 requires "
	"'constexpr' specifier">, InGroup<StaticFloatInit>, DefaultError;
	def note_in_class_initializer_float_type_cxx11 : Note<"add 'constexpr'">;
	def err_in_class_initializer_literal_type : Error<
	"in-class initializer for static data member of type %0 requires "
	"'constexpr' specifier">;
	def err_in_class_initializer_non_constant : Error<
	"in-class initializer for static data member is not a constant expression">;
	def err_default_member_initializer_not_yet_parsed : Error<
	"default member initializer for %1 needed within definition of enclosing "
	"class %0 outside of member functions">;
	def note_default_member_initializer_not_yet_parsed : Note<
	"default member initializer declared here">;
	def err_default_member_initializer_cycle
	: Error<"default member initializer for %0 uses itself">;

	def ext_in_class_initializer_non_constant : Extension<
	"in-class initializer for static data member is not a constant expression; "
	"folding it to a constant is a GNU extension">, InGroup<GNUFoldingConstant>;

	def err_thread_dynamic_init : Error<
	"initializer for thread-local variable must be a constant expression">;
	def err_thread_nontrivial_dtor : Error<
	"type of thread-local variable has non-trivial destruction">;
	def note_use_thread_local : Note<
	"use 'thread_local' to allow this">;

	// C++ anonymous unions and GNU anonymous structs/unions
	def ext_anonymous_union : Extension<
	"anonymous unions are a C11 extension">, InGroup<C11>;
	def ext_gnu_anonymous_struct : Extension<
	"anonymous structs are a GNU extension">, InGroup<GNUAnonymousStruct>;
	def ext_c11_anonymous_struct : Extension<
	"anonymous structs are a C11 extension">, InGroup<C11>;
	def err_anonymous_union_not_static : Error<
	"anonymous unions at namespace or global scope must be declared 'static'">;
	def err_anonymous_union_with_storage_spec : Error<
	"anonymous union at class scope must not have a storage specifier">;
	def err_anonymous_struct_not_member : Error<
	"anonymous %select{structs\|structs and classes}0 must be "
	"%select{struct or union\|class}0 members">;
	def err_anonymous_record_member_redecl : Error<
	"member of anonymous %select{struct\|union}0 redeclares %1">;
	def err_anonymous_record_with_type : Error<
	"types cannot be declared in an anonymous %select{struct\|union}0">;
	def ext_anonymous_record_with_type : Extension<
	"types declared in an anonymous %select{struct\|union}0 are a Microsoft "
	"extension">, InGroup<MicrosoftAnonTag>;
	def ext_anonymous_record_with_anonymous_type : Extension<
	"anonymous types declared in an anonymous %select{struct\|union}0 "
	"are an extension">, InGroup<DiagGroup<"nested-anon-types">>;
	def err_anonymous_record_with_function : Error<
	"functions cannot be declared in an anonymous %select{struct\|union}0">;
	def err_anonymous_record_with_static : Error<
	"static members cannot be declared in an anonymous %select{struct\|union}0">;
	def err_anonymous_record_bad_member : Error<
	"anonymous %select{struct\|union}0 can only contain non-static data members">;
	def err_anonymous_record_nonpublic_member : Error<
	"anonymous %select{struct\|union}0 cannot contain a "
	"%select{private\|protected}1 data member">;
	def ext_ms_anonymous_record : ExtWarn<
	"anonymous %select{structs\|unions}0 are a Microsoft extension">,
	InGroup<MicrosoftAnonTag>;

	// C++ local classes
	def err_reference_to_local_in_enclosing_context : Error<
	"reference to local %select{variable\|binding}1 %0 declared in enclosing "
	"%select{%3\|block literal\|lambda expression\|context}2">;
	def err_capture_binding_openmp : Error<
	"capturing a structured binding is not yet supported in OpenMP">;
	def ext_capture_binding : ExtWarn<
	"captured structured bindings are a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_capture_binding : Warning<
	"captured structured bindings are incompatible with "
	"C++ standards before C++20">,
	InGroup<CXXPre20Compat>, DefaultIgnore;

	def err_static_data_member_not_allowed_in_local_class : Error<
	"static data member %0 not allowed in local %sub{select_tag_type_kind}2 %1">;

	// C++ derived classes
	def err_base_clause_on_union : Error<"unions cannot have base classes">;
	def err_base_must_be_class : Error<"base specifier must name a class">;
	def err_union_as_base_class : Error<"unions cannot be base classes">;
	def err_circular_inheritance : Error<
	"circular inheritance between %0 and %1">;
	def err_base_class_has_flexible_array_member : Error<
	"base class %0 has a flexible array member">;
	def err_incomplete_base_class : Error<"base class has incomplete type">;
	def err_duplicate_base_class : Error<
	"base class %0 specified more than once as a direct base class">;
	def warn_inaccessible_base_class : Warning<
	"direct base %0 is inaccessible due to ambiguity:%1">,
	InGroup<DiagGroup<"inaccessible-base">>;
	// FIXME: better way to display derivation? Pass entire thing into diagclient?
	def err_ambiguous_derived_to_base_conv : Error<
	"ambiguous conversion from derived class %0 to base class %1:%2">;
	def err_ambiguous_memptr_conv : Error<
	"ambiguous conversion from pointer to member of %select{base\|derived}0 "
	"class %1 to pointer to member of %select{derived\|base}0 class %2:%3">;
	def ext_ms_ambiguous_direct_base : ExtWarn<
	"accessing inaccessible direct base %0 of %1 is a Microsoft extension">,
	InGroup<MicrosoftInaccessibleBase>;

	def err_memptr_conv_via_virtual : Error<
	"conversion from pointer to member of class %0 to pointer to member "
	"of class %1 via virtual base %2 is not allowed">;

	// C++ member name lookup
	def err_ambiguous_member_multiple_subobjects : Error<
	"non-static member %0 found in multiple base-class subobjects of type %1:%2">;
	def err_ambiguous_member_multiple_subobject_types : Error<
	"member %0 found in multiple base classes of different types">;
	def note_ambiguous_member_found : Note<"member found by ambiguous name lookup">;
	def note_ambiguous_member_type_found : Note<
	"member type %0 found by ambiguous name lookup">;
	def err_ambiguous_reference : Error<"reference to %0 is ambiguous">;
	def note_ambiguous_candidate : Note<"candidate found by name lookup is %q0">;
	def err_ambiguous_tag_hiding : Error<"a type named %0 is hidden by a "
	"declaration in a different namespace">;
	def note_hidden_tag : Note<"type declaration hidden">;
	def note_hiding_object : Note<"declaration hides type">;

	// C++ operator overloading
	def err_operator_overload_needs_class_or_enum : Error<
	"overloaded %0 must have at least one parameter of class "
	"or enumeration type">;

	def err_operator_overload_variadic : Error<"overloaded %0 cannot be variadic">;
	def warn_cxx20_compat_operator_overload_static : Warning<
	"declaring overloaded %0 as 'static' is incompatible with C++ standards "
	"before C++2b">, InGroup<CXXPre2bCompat>, DefaultIgnore;
	def ext_operator_overload_static : ExtWarn<
	"declaring overloaded %0 as 'static' is a C++2b extension">, InGroup<CXX2b>;
	def err_operator_overload_static : Error<
	"overloaded %0 cannot be a static member function">;
	def err_operator_overload_default_arg : Error<
	"parameter of overloaded %0 cannot have a default argument">;

	def ext_subscript_overload : Warning<
	"overloaded %0 with %select{no\|a defaulted\|more than one}1 parameter is a "
	"C++2b extension">, InGroup<CXXPre2bCompat>, DefaultIgnore;
	def error_subscript_overload : Error<
	"overloaded %0 cannot have %select{no\|a defaulted\|more than one}1 parameter before C++2b">;

	def err_operator_overload_must_be : Error<
	"overloaded %0 must be a %select{unary\|binary\|unary or binary}2 operator "
	"(has %1 parameter%s1)">;

	def err_operator_overload_must_be_member : Error<
	"overloaded %0 must be a non-static member function">;
	def err_operator_overload_post_incdec_must_be_int : Error<
	"parameter of overloaded post-%select{increment\|decrement}1 operator must "
	"have type 'int' (not %0)">;

	// C++ allocation and deallocation functions.
	def err_operator_new_delete_declared_in_namespace : Error<
	"%0 cannot be declared inside a namespace">;
	def err_operator_new_delete_declared_static : Error<
	"%0 cannot be declared static in global scope">;
	def ext_operator_new_delete_declared_inline : ExtWarn<
	"replacement function %0 cannot be declared 'inline'">,
	InGroup<DiagGroup<"inline-new-delete">>;
	def err_operator_new_delete_invalid_result_type : Error<
	"%0 must return type %1">;
	def err_operator_new_delete_dependent_result_type : Error<
	"%0 cannot have a dependent return type; use %1 instead">;
	def err_operator_new_delete_too_few_parameters : Error<
	"%0 must have at least one parameter">;
	def err_operator_new_delete_template_too_few_parameters : Error<
	"%0 template must have at least two parameters">;
	def warn_operator_new_returns_null : Warning<
	"%0 should not return a null pointer unless it is declared 'throw()'"
	"%select{\| or 'noexcept'}1">, InGroup<OperatorNewReturnsNull>;

	def err_operator_new_dependent_param_type : Error<
	"%0 cannot take a dependent type as first parameter; "
	"use size_t (%1) instead">;
	def err_operator_new_param_type : Error<
	"%0 takes type size_t (%1) as first parameter">;
	def err_operator_new_default_arg: Error<
	"parameter of %0 cannot have a default argument">;
	def err_operator_delete_dependent_param_type : Error<
	"%0 cannot take a dependent type as first parameter; use %1 instead">;
	def err_operator_delete_param_type : Error<
	"first parameter of %0 must have type %1">;
	def err_destroying_operator_delete_not_usual : Error<
	"destroying operator delete can have only an optional size and optional "
	"alignment parameter">;
	def note_implicit_delete_this_in_destructor_here : Note<
	"while checking implicit 'delete this' for virtual destructor">;
	def err_builtin_operator_new_delete_not_usual : Error<
	"call to '%select{__builtin_operator_new\|__builtin_operator_delete}0' "
	"selects non-usual %select{allocation\|deallocation}0 function">;
	def note_non_usual_function_declared_here : Note<
	"non-usual %0 declared here">;

	// C++ literal operators
	def err_literal_operator_outside_namespace : Error<
	"literal operator %0 must be in a namespace or global scope">;
	def err_literal_operator_id_outside_namespace : Error<
	"non-namespace scope '%0' cannot have a literal operator member">;
	def err_literal_operator_default_argument : Error<
	"literal operator cannot have a default argument">;
	def err_literal_operator_bad_param_count : Error<
	"non-template literal operator must have one or two parameters">;
	def err_literal_operator_invalid_param : Error<
	"parameter of literal operator must have type 'unsigned long long', 'long double', 'char', 'wchar_t', 'char16_t', 'char32_t', or 'const char *'">;
	def err_literal_operator_param : Error<
	"invalid literal operator parameter type %0, did you mean %1?">;
	def err_literal_operator_template_with_params : Error<
	"literal operator template cannot have any parameters">;
	def err_literal_operator_template : Error<
	"template parameter list for literal operator must be either 'char...' or 'typename T, T...'">;
	def err_literal_operator_extern_c : Error<
	"literal operator must have C++ linkage">;
	def ext_string_literal_operator_template : ExtWarn<
	"string literal operator templates are a GNU extension">,
	InGroup<GNUStringLiteralOperatorTemplate>;
	def warn_user_literal_reserved : Warning<
	"user-defined literal suffixes not starting with '_' are reserved"
	"%select{; no literal will invoke this operator\|}0">,
	InGroup<UserDefinedLiterals>;

	// C++ conversion functions
	def err_conv_function_not_member : Error<
	"conversion function must be a non-static member function">;
	def err_conv_function_return_type : Error<
	"conversion function cannot have a return type">;
	def err_conv_function_with_params : Error<
	"conversion function cannot have any parameters">;
	def err_conv_function_variadic : Error<
	"conversion function cannot be variadic">;
	def err_conv_function_to_array : Error<
	"conversion function cannot convert to an array type">;
	def err_conv_function_to_function : Error<
	"conversion function cannot convert to a function type">;
	def err_conv_function_with_complex_decl : Error<
	"cannot specify any part of a return type in the "
	"declaration of a conversion function"
	"%select{"
	"; put the complete type after 'operator'\|"
	"; use a typedef to declare a conversion to %1\|"
	"; use an alias template to declare a conversion to %1\|"
	"}0">;
	def err_conv_function_redeclared : Error<
	"conversion function cannot be redeclared">;
	def warn_conv_to_self_not_used : Warning<
	"conversion function converting %0 to itself will never be used">,
	InGroup<ClassConversion>;
	def warn_conv_to_base_not_used : Warning<
	"conversion function converting %0 to its base class %1 will never be used">,
	InGroup<ClassConversion>;
	def warn_conv_to_void_not_used : Warning<
	"conversion function converting %0 to %1 will never be used">,
	InGroup<ClassConversion>;

	def warn_not_compound_assign : Warning<
	"use of unary operator that may be intended as compound assignment (%0=)">;

	// C++11 explicit conversion operators
	def ext_explicit_conversion_functions : ExtWarn<
	"explicit conversion functions are a C++11 extension">, InGroup<CXX11>;
	def warn_cxx98_compat_explicit_conversion_functions : Warning<
	"explicit conversion functions are incompatible with C++98">,
	InGroup<CXX98Compat>, DefaultIgnore;

	// C++11 defaulted functions
	def err_defaulted_special_member_params : Error<
	"an explicitly-defaulted %select{\|copy \|move }0constructor cannot "
	"have default arguments">;
	def err_defaulted_special_member_variadic : Error<
	"an explicitly-defaulted %select{\|copy \|move }0constructor cannot "
	"be variadic">;
	def err_defaulted_special_member_return_type : Error<
	"explicitly-defaulted %select{copy\|move}0 assignment operator must "
	"return %1">;
	def err_defaulted_special_member_quals : Error<
	"an explicitly-defaulted %select{copy\|move}0 assignment operator may not "
	"have 'const'%select{, 'constexpr'\|}1 or 'volatile' qualifiers">;
	def err_defaulted_special_member_volatile_param : Error<
	"the parameter for an explicitly-defaulted %sub{select_special_member_kind}0 "
	"may not be volatile">;
	def err_defaulted_special_member_move_const_param : Error<
	"the parameter for an explicitly-defaulted move "
	"%select{constructor\|assignment operator}0 may not be const">;
	def err_defaulted_special_member_copy_const_param : Error<
	"the parameter for this explicitly-defaulted copy "
	"%select{constructor\|assignment operator}0 is const, but a member or base "
	"requires it to be non-const">;
	def err_defaulted_copy_assign_not_ref : Error<
	"the parameter for an explicitly-defaulted copy assignment operator must be an "
	"lvalue reference type">;
	def err_incorrect_defaulted_constexpr : Error<
	"defaulted definition of %sub{select_special_member_kind}0 "
	"is not constexpr">;
	def err_incorrect_defaulted_consteval : Error<
	"defaulted declaration of %sub{select_special_member_kind}0 "
	"cannot be consteval because implicit definition is not constexpr">;
	def warn_defaulted_method_deleted : Warning<
	"explicitly defaulted %sub{select_special_member_kind}0 is implicitly "
	"deleted">, InGroup<DefaultedFunctionDeleted>;
	def note_replace_equals_default_to_delete : Note<
	"replace 'default' with 'delete'">;
	def err_out_of_line_default_deletes : Error<
	"defaulting this %sub{select_special_member_kind}0 "
	"would delete it after its first declaration">;
	def note_deleted_type_mismatch : Note<
	"function is implicitly deleted because its declared type does not match "
	"the type of an implicit %sub{select_special_member_kind}0">;
	def warn_cxx17_compat_defaulted_method_type_mismatch : Warning<
	"explicitly defaulting this %sub{select_special_member_kind}0 with a type "
	"different from the implicit type is incompatible with C++ standards before "
	"C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def warn_vbase_moved_multiple_times : Warning<
	"defaulted move assignment operator of %0 will move assign virtual base "
	"class %1 multiple times">, InGroup<DiagGroup<"multiple-move-vbase">>;
	def note_vbase_moved_here : Note<
	"%select{%1 is a virtual base class of base class %2 declared here\|"
	"virtual base class %1 declared here}0">;

	// C++20 defaulted comparisons
	// This corresponds to values of Sema::DefaultedComparisonKind.
	def select_defaulted_comparison_kind : TextSubstitution<
	"%select{<ERROR>\|equality\|three-way\|equality\|relational}0 comparison "
	"operator">;
	def ext_defaulted_comparison : ExtWarn<
	"defaulted comparison operators are a C++20 extension">, InGroup<CXX20>;
	def warn_cxx17_compat_defaulted_comparison : Warning<
	"defaulted comparison operators are incompatible with C++ standards "
	"before C++20">, InGroup<CXXPre20Compat>, DefaultIgnore;
	def err_defaulted_comparison_template : Error<
	"comparison operator template cannot be defaulted">;
	def err_defaulted_comparison_num_args : Error<
	"%select{non-member\|member}0 %sub{select_defaulted_comparison_kind}1"
	" comparison operator must have %select{2\|1}0 parameters">;
	def err_defaulted_comparison_param : Error<
	"invalid parameter type for defaulted %sub{select_defaulted_comparison_kind}0"
	"; found %1, expected %2%select{\| or %4}3">;
	def err_defaulted_comparison_param_unknown : Error<
	"invalid parameter type for non-member defaulted"
	" %sub{select_defaulted_comparison_kind}0"
	"; found %1, expected class or reference to a constant class">;
	def err_defaulted_comparison_param_mismatch : Error<
	"parameters for defaulted %sub{select_defaulted_comparison_kind}0 "
	"must have the same type%diff{ (found $ vs $)\|}1,2">;
	def err_defaulted_comparison_not_friend : Error<
	"%sub{select_defaulted_comparison_kind}0 is not a friend of"
	" %select{\|incomplete class }1%2">;
	def err_defaulted_comparison_non_const : Error<
	"defaulted member %sub{select_defaulted_comparison_kind}0 must be "
	"const-qualified">;
	def err_defaulted_comparison_return_type_not_bool : Error<
	"return type for defaulted %sub{select_defaulted_comparison_kind}0 "
	"must be 'bool', not %1">;
	def err_defaulted_comparison_deduced_return_type_not_auto : Error<
	"deduced return type for defaulted %sub{select_defaulted_comparison_kind}0 "
	"must be 'auto', not %1">;
	def warn_defaulted_comparison_deleted : Warning<
	"explicitly defaulted %sub{select_defaulted_comparison_kind}0 is implicitly "
	"deleted">, InGroup<DefaultedFunctionDeleted>;
	def err_non_first_default_compare_deletes : Error<
	"defaulting %select{this %sub{select_defaulted_comparison_kind}1\|"
	"the corresponding implicit 'operator==' for this defaulted 'operator<=>'}0 "
	"would delete it after its first declaration">;
	def err_non_first_default_compare_in_class : Error<
	"defaulting this %sub{select_defaulted_comparison_kind}0 "
	"is not allowed because it was already declared outside the class">;
	def note_defaulted_comparison_union : Note<
	"defaulted %0 is implicitly deleted because "
	"%2 is a %select{union-like class\|union}1 with variant members">;
	def note_defaulted_comparison_reference_member : Note<
	"defaulted %0 is implicitly deleted because "
	"class %1 has a reference member">;
	def note_defaulted_comparison_ambiguous : Note<
	"defaulted %0 is implicitly deleted because implied %select{\|'==' \|'<' }1"
	"comparison %select{\|for member %3 \|for base class %3 }2is ambiguous">;
	def note_defaulted_comparison_inaccessible : Note<
	"defaulted %0 is implicitly deleted because it would invoke a "
	"%select{private\|protected}3 %4%select{ member of %6\|"
	" member of %6 to compare member %2\| to compare base class %2}1">;
	def note_defaulted_comparison_calls_deleted : Note<
	"defaulted %0 is implicitly deleted because it would invoke a deleted "
	"comparison function%select{\| for member %2\| for base class %2}1">;
	def note_defaulted_comparison_no_viable_function : Note<
	"defaulted %0 is implicitly deleted because there is no viable "
	"%select{three-way comparison function\|'operator=='}1 for "
	"%select{\|member \|base class }2%3">;
	def note_defaulted_comparison_no_viable_function_synthesized : Note<
	"three-way comparison cannot be synthesized because there is no viable "
	"function for %select{'=='\|'<'}0 comparison">;
	def note_defaulted_comparison_not_rewritten_callee : Note<
	"defaulted %0 is implicitly deleted because this non-rewritten comparison "
	"function would be the best match for the comparison">;
	def note_defaulted_comparison_not_rewritten_conversion : Note<
	"defaulted %0 is implicitly deleted because a builtin comparison function "
	"using this conversion would be the best match for the comparison">;
	def note_defaulted_comparison_cannot_deduce : Note<
	"return type of defaulted 'operator<=>' cannot be deduced because "
	"return type %2 of three-way comparison for %select{\|member\|base class}0 %1 "
	"is not a standard comparison category type">;
	def err_defaulted_comparison_cannot_deduce_undeduced_auto : Error<
	"return type of defaulted 'operator<=>' cannot be deduced because "
	"three-way comparison for %select{\|member\|base class}0 %1 "
	"has a deduced return type and is not yet defined">;
	def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note<
	"%select{\|member\|base class}0 %1 declared here">;
	def note_defaulted_comparison_cannot_deduce_callee : Note<
	"selected 'operator<=>' for %select{\|member\|base class}0 %1 declared here">;
	def err_incorrect_defaulted_comparison_constexpr : Error<
	"defaulted definition of %select{%sub{select_defaulted_comparison_kind}1\|"
	"three-way comparison operator}0 "
	"cannot be declared %select{constexpr\|consteval}2 because "
	"%select{it\|the corresponding implicit 'operator=='}0 "
	"invokes a non-constexpr comparison function">;
	def note_defaulted_comparison_not_constexpr : Note<
	"non-constexpr comparison function would be used to compare "
	"%select{\|member %1\|base class %1}0">;
	def note_defaulted_comparison_not_constexpr_here : Note<
	"non-constexpr comparison function declared here">;
	def note_in_declaration_of_implicit_equality_comparison : Note<
	"while declaring the corresponding implicit 'operator==' "
	"for this defaulted 'operator<=>'">;

	def ext_implicit_exception_spec_mismatch : ExtWarn<
	"function previously declared with an %select{explicit\|implicit}0 exception "
	"specification redeclared with an %select{implicit\|explicit}0 exception "
	"specification">, InGroup<DiagGroup<"implicit-exception-spec-mismatch">>;

	def warn_ptr_arith_precedes_bounds : Warning<
	"the pointer decremented by %0 refers before the beginning of the array">,
	InGroup<ArrayBoundsPointerArithmetic>, DefaultIgnore;
	def warn_ptr_arith_exceeds_bounds : Warning<
	"the pointer incremented by %0 refers past the end of the array (that has type %1)">,
	InGroup<ArrayBoundsPointerArithmetic>, DefaultIgnore;
	def warn_array_index_precedes_bounds : Warning<
	"array index %0 is before the beginning of the array">,
	InGroup<ArrayBounds>;
	def warn_array_index_exceeds_bounds : Warning<
	"array index %0 is past the end of the array (that has type %1%select{\|, cast to %3}2)">,
	InGroup<ArrayBounds>;
	def warn_ptr_arith_exceeds_max_addressable_bounds : Warning<
	"the pointer incremented by %0 refers past the last possible element for an array in %1-bit "
	"address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
	InGroup<ArrayBounds>;
	def warn_array_index_exceeds_max_addressable_bounds : Warning<
	"array index %0 refers past the last possible element for an array in %1-bit "
	"address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
	InGroup<ArrayBounds>;
	def note_array_declared_here : Note<
	"array %0 declared here">;

	def warn_inconsistent_array_form : Warning<
	"argument %0 of type %1 with mismatched bound">,
	InGroup<ArrayParameter>, DefaultIgnore;
	def note_previous_declaration_as : Note<
	"previously declared as %0 here">;

	def warn_printf_insufficient_data_args : Warning<
	"more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>;
	def warn_printf_data_arg_not_used : Warning<
	"data argument not used by format string">, InGroup<FormatExtraArgs>;
	def warn_format_invalid_conversion : Warning<
	"invalid conversion specifier '%0'">, InGroup<FormatInvalidSpecifier>;
	def warn_printf_incomplete_specifier : Warning<
	"incomplete format specifier">, InGroup<Format>;
	def warn_missing_format_string : Warning<
	"format string missing">, InGroup<Format>;
	def warn_scanf_nonzero_width : Warning<
	"zero field width in scanf format string is unused">,
	InGroup<Format>;
	def warn_format_conversion_argument_type_mismatch : Warning<
	"format specifies type %0 but the argument has "
	"%select{type\|underlying type}2 %1">,
	InGroup<Format>;
	def warn_format_conversion_argument_type_mismatch_pedantic : Extension<
	warn_format_conversion_argument_type_mismatch.Summary>,
	InGroup<FormatPedantic>;
	def warn_format_conversion_argument_type_mismatch_confusion : Warning<
	warn_format_conversion_argument_type_mismatch.Summary>,
	InGroup<FormatTypeConfusion>, DefaultIgnore;
	def warn_format_argument_needs_cast : Warning<
	"%select{values of type\|enum values with underlying type}2 '%0' should not "
	"be used as format arguments; add an explicit cast to %1 instead">,
	InGroup<Format>;
	def warn_format_argument_needs_cast_pedantic : Warning<
	warn_format_argument_needs_cast.Summary>,
	InGroup<FormatPedantic>, DefaultIgnore;
	def warn_printf_positional_arg_exceeds_data_args : Warning <
	"data argument position '%0' exceeds the number of data arguments (%1)">,
	InGroup<Format>;
	def warn_format_zero_positional_specifier : Warning<
	"position arguments in format strings start counting at 1 (not 0)">,
	InGroup<Format>;
	def warn_format_invalid_positional_specifier : Warning<
	"invalid position specified for %select{field width\|field precision}0">,
	InGroup<Format>;
	def warn_format_mix_positional_nonpositional_args : Warning<
	"cannot mix positional and non-positional arguments in format string">,
	InGroup<Format>;
	def warn_static_array_too_small : Warning<
	"array argument is too small; %select{contains %0 elements\|is of size %0}2,"
	" callee requires at least %1">,
	InGroup<ArrayBounds>;
	def note_callee_static_array : Note<
	"callee declares array parameter as static here">;
	def warn_empty_format_string : Warning<
	"format string is empty">, InGroup<FormatZeroLength>;
	def warn_format_string_is_wide_literal : Warning<
	"format string should not be a wide string">, InGroup<Format>;
	def warn_printf_format_string_contains_null_char : Warning<
	"format string contains '\\0' within the string body">, InGroup<Format>;
	def warn_printf_format_string_not_null_terminated : Warning<
	"format string is not null-terminated">, InGroup<Format>;
	def warn_printf_asterisk_missing_arg : Warning<
	"'%select{\|.}0' specified field %select{width\|precision}0 is missing a matching 'int' argument">,
	InGroup<Format>;
	def warn_printf_asterisk_wrong_type : Warning<
	"field %select{width\|precision}0 should have type %1, but argument has type %2">,
	InGroup<Format>;
	def warn_printf_nonsensical_optional_amount: Warning<
	"%select{field width\|precision}0 used with '%1' conversion specifier, resulting in undefined behavior">,
	InGroup<Format>;
	def warn_printf_nonsensical_flag: Warning<
	"flag '%0' results in undefined behavior with '%1' conversion specifier">,
	InGroup<Format>;
	def warn_format_nonsensical_length: Warning<
	"length modifier '%0' results in undefined behavior or no effect with '%1' conversion specifier">,
	InGroup<Format>;
	def warn_format_non_standard_positional_arg: Warning<
	"positional arguments are not supported by ISO C">, InGroup<FormatNonStandard>, DefaultIgnore;
	def warn_format_non_standard: Warning<
	"'%0' %select{length modifier\|conversion specifier}1 is not supported by ISO C">,
	InGroup<FormatNonStandard>, DefaultIgnore;
	def warn_format_non_standard_conversion_spec: Warning<
	"using length modifier '%0' with conversion specifier '%1' is not supported by ISO C">,
	InGroup<FormatNonStandard>, DefaultIgnore;
	def err_invalid_mask_type_size : Error<
	"mask type size must be between 1-byte and 8-bytes">;
	def warn_format_invalid_annotation : Warning<
	"using '%0' format specifier annotation outside of os_log()/os_trace()">,
	InGroup<Format>;
	def warn_format_P_no_precision : Warning<
	"using '%%P' format specifier without precision">,
	InGroup<Format>;
	def warn_printf_ignored_flag: Warning<
	"flag '%0' is ignored when flag '%1' is present">,
	InGroup<Format>;
	def warn_printf_empty_objc_flag: Warning<
	"missing object format flag">,
	InGroup<Format>;
	def warn_printf_ObjCflags_without_ObjCConversion: Warning<
	"object format flags cannot be used with '%0' conversion specifier">,
	InGroup<Format>;
	def warn_printf_invalid_objc_flag: Warning<
	"'%0' is not a valid object format flag">,
	InGroup<Format>;
	def warn_printf_narg_not_supported : Warning<
	"'%%n' specifier not supported on this platform">,
	InGroup<Format>;
	def warn_scanf_scanlist_incomplete : Warning<
	"no closing ']' for '%%[' in scanf format string">,
	InGroup<Format>;
	def warn_format_bool_as_character : Warning<
	"using '%0' format specifier, but argument has boolean value">,
	InGroup<Format>;
	def note_format_string_defined : Note<"format string is defined here">;
	def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
	def note_printf_c_str: Note<"did you mean to call the %0 method?">;
	def note_format_security_fixit: Note<
	"treat the string as an argument to avoid this">;

	def warn_null_arg : Warning<
	"null passed to a callee that requires a non-null argument">,
	InGroup<NonNull>;
	def warn_null_ret : Warning<
	"null returned from %select{function\|method}0 that requires a non-null return value">,
	InGroup<NonNull>;

	def err_lifetimebound_no_object_param : Error<
	"'lifetimebound' attribute cannot be applied; %select{static \|non-}0member "
	"function has no implicit object parameter">;
	def err_lifetimebound_ctor_dtor : Error<
	"'lifetimebound' attribute cannot be applied to a "
	"%select{constructor\|destructor}0">;

	// CHECK: returning address/reference of stack memory
	def warn_ret_stack_addr_ref : Warning<
	"%select{address of\|reference to}0 stack memory associated with "
	"%select{local variable\|parameter}2 %1 returned">,
	InGroup<ReturnStackAddress>;
	def warn_ret_local_temp_addr_ref : Warning<
	"returning %select{address of\|reference to}0 local temporary object">,
	InGroup<ReturnStackAddress>;
	def warn_ret_addr_label : Warning<
	"returning address of label, which is local">,
	InGroup<ReturnStackAddress>;
	def err_ret_local_block : Error<
	"returning block that lives on the local stack">;
	def note_local_var_initializer : Note<
	"%select{via initialization of\|binding reference}0 variable "
	"%select{%2 \|}1here">;
	def note_lambda_capture_initializer : Note<
	"%select{implicitly \|}2captured%select{\| by reference}3"
	"%select{%select{ due to use\|}2 here\|"
	" via initialization of lambda capture %0}1">;
	def note_init_with_default_member_initalizer : Note<
	"initializing field %0 with default member initializer">;

	// Check for initializing a member variable with the address or a reference to
	// a constructor parameter.
	def warn_bind_ref_member_to_parameter : Warning<
	"binding reference member %0 to stack allocated "
	"%select{variable\|parameter}2 %1">, InGroup<DanglingField>;
	def warn_init_ptr_member_to_parameter_addr : Warning<
	"initializing pointer member %0 with the stack address of "
	"%select{variable\|parameter}2 %1">, InGroup<DanglingField>;
	def note_ref_or_ptr_member_declared_here : Note<
	"%select{reference\|pointer}0 member declared here">;

	def err_dangling_member : Error<
	"%select{reference\|backing array for 'std::initializer_list'}2 "
	"%select{\|subobject of }1member %0 "
	"%select{binds to\|is}2 a temporary object "
	"whose lifetime would be shorter than the lifetime of "
	"the constructed object">;
	def warn_dangling_member : Warning<
	"%select{reference\|backing array for 'std::initializer_list'}2 "
	"%select{\|subobject of }1member %0 "
	"%select{binds to\|is}2 a temporary object "
	"whose lifetime is shorter than the lifetime of the constructed object">,
	InGroup<DanglingField>;
	def warn_dangling_lifetime_pointer_member : Warning<
	"initializing pointer member %0 to point to a temporary object "
	"whose lifetime is shorter than the lifetime of the constructed object">,
	InGroup<DanglingGsl>;
	def note_lifetime_extending_member_declared_here : Note<
	"%select{%select{reference\|'std::initializer_list'}0 member\|"
	"member with %select{reference\|'std::initializer_list'}0 subobject}1 "
	"declared here">;
	def warn_dangling_variable : Warning<
	"%select{temporary %select{whose address is used as value of\|"
	"%select{\|implicitly }2bound to}4 "
	"%select{%select{\|reference }4member of local variable\|"
	"local %select{variable\|reference}4}1\|"
	"array backing "
	"%select{initializer list subobject of local variable\|"
	"local initializer list}1}0 "
	"%select{%3 \|}2will be destroyed at the end of the full-expression">,
	InGroup<Dangling>;
	def warn_new_dangling_reference : Warning<
	"temporary bound to reference member of allocated object "
	"will be destroyed at the end of the full-expression">,
	InGroup<DanglingField>;
	def warn_dangling_lifetime_pointer : Warning<
	"object backing the pointer "
	"will be destroyed at the end of the full-expression">,
	InGroup<DanglingGsl>;
	def warn_new_dangling_initializer_list : Warning<
	"array backing "
	"%select{initializer list subobject of the allocated object\|"
	"the allocated initializer list}0 "
	"will be destroyed at the end of the full-expression">,
	InGroup<DanglingInitializerList>;
	def warn_unsupported_lifetime_extension : Warning<
	"sorry, lifetime extension of "
	"%select{temporary\|backing array of initializer list}0 created "
	"by aggregate initialization using default member initializer "
	"is not supported; lifetime of %select{temporary\|backing array}0 "
	"will end at the end of the full-expression">, InGroup<Dangling>;

	// For non-floating point, expressions of the form x == x or x != x
	// should result in a warning, since these always evaluate to a constant.
	// Array comparisons have similar warnings
	def warn_comparison_always : Warning<
	"%select{self-\|array }0comparison always evaluates to "
	"%select{a constant\|true\|false\|'std::strong_ordering::equal'}1">,
	InGroup<TautologicalCompare>;
	def warn_comparison_bitwise_always : Warning<
	"bitwise comparison always evaluates to %select{false\|true}0">,
	InGroup<TautologicalBitwiseCompare>, DefaultIgnore;
	def warn_comparison_bitwise_or : Warning<
	"bitwise or with non-zero value always evaluates to true">,
	InGroup<TautologicalBitwiseCompare>, DefaultIgnore;
	def warn_tautological_overlap_comparison : Warning<
	"overlapping comparisons always evaluate to %select{false\|true}0">,
	InGroup<TautologicalOverlapCompare>, DefaultIgnore;
	def warn_depr_array_comparison : Warning<
	"comparison between two arrays is deprecated; "
	"to compare array addresses, use unary '+' to decay operands to pointers">,
	InGroup<DeprecatedArrayCompare>;

	def warn_stringcompare : Warning<
	"result of comparison against %select{a string literal\|@encode}0 is "
	"unspecified (use an explicit string comparison function instead)">,
	InGroup<StringCompare>;

	def warn_identity_field_assign : Warning<
	"assigning %select{field\|instance variable}0 to itself">,
	InGroup<SelfAssignmentField>;

	// Type safety attributes
	def err_type_tag_for_datatype_not_ice : Error<
	"'type_tag_for_datatype' attribute requires the initializer to be "
	"an %select{integer\|integral}0 constant expression">;
	def err_type_tag_for_datatype_too_large : Error<
	"'type_tag_for_datatype' attribute requires the initializer to be "
	"an %select{integer\|integral}0 constant expression "
	"that can be represented by a 64 bit integer">;
	def err_tag_index_out_of_range : Error<
	"%select{type tag\|argument}0 index %1 is greater than the number of arguments specified">;
	def warn_type_tag_for_datatype_wrong_kind : Warning<
	"this type tag was not designed to be used with this function">,
	InGroup<TypeSafety>;
	def warn_type_safety_type_mismatch : Warning<
	"argument type %0 doesn't match specified %1 type tag "
	"%select{that requires %3\|}2">, InGroup<TypeSafety>;
	def warn_type_safety_null_pointer_required : Warning<
	"specified %0 type tag requires a null pointer">, InGroup<TypeSafety>;

	// Generic selections.
	def err_assoc_type_incomplete : Error<
	"type %0 in generic association incomplete">;
	def err_assoc_type_nonobject : Error<
	"type %0 in generic association not an object type">;
	def err_assoc_type_variably_modified : Error<
	"type %0 in generic association is a variably modified type">;
	def err_assoc_compatible_types : Error<
	"type %0 in generic association compatible with previously specified type %1">;
	def note_compat_assoc : Note<
	"compatible type %0 specified here">;
	def err_generic_sel_no_match : Error<
	"controlling expression type %0 not compatible with any generic association type">;
	def err_generic_sel_multi_match : Error<
	"controlling expression type %0 compatible with %1 generic association types">;


	// Blocks
	def err_blocks_disable : Error<"blocks support disabled - compile with -fblocks"
	" or %select{pick a deployment target that supports them\|for OpenCL C 2.0"
	" or OpenCL C 3.0 with __opencl_c_device_enqueue feature}0">;
	def err_block_returning_array_function : Error<
	"block cannot return %select{array\|function}0 type %1">;

	// Builtin annotation
	def err_builtin_annotation_first_arg : Error<
	"first argument to __builtin_annotation must be an integer">;
	def err_builtin_annotation_second_arg : Error<
	"second argument to __builtin_annotation must be a non-wide string constant">;
	def err_msvc_annotation_wide_str : Error<
	"arguments to __annotation must be wide string constants">;

	// CFString checking
	def err_cfstring_literal_not_string_constant : Error<
	"CFString literal is not a string constant">;
	def warn_cfstring_truncated : Warning<
	"input conversion stopped due to an input byte that does not "
	"belong to the input codeset UTF-8">,
	InGroup<DiagGroup<"CFString-literal">>;

	// os_log checking
	// TODO: separate diagnostic for os_trace()
	def err_os_log_format_not_string_constant : Error<
	"os_log() format argument is not a string constant">;
	def err_os_log_argument_too_big : Error<
	"os_log() argument %0 is too big (%1 bytes, max %2)">;
	def warn_os_log_format_narg : Error<
	"os_log() '%%n' format specifier is not allowed">, DefaultError;

	// Statements.
	def err_continue_not_in_loop : Error<
	"'continue' statement not in loop statement">;
	def err_break_not_in_loop_or_switch : Error<
	"'break' statement not in loop or switch statement">;
	def warn_loop_ctrl_binds_to_inner : Warning<
	"'%0' is bound to current loop, GCC binds it to the enclosing loop">,
	InGroup<GccCompat>;
	def warn_break_binds_to_switch : Warning<
	"'break' is bound to loop, GCC binds it to switch">,
	InGroup<GccCompat>;
	def err_default_not_in_switch : Error<
	"'default' statement not in switch statement">;
	def err_case_not_in_switch : Error<"'case' statement not in switch statement">;
	def warn_bool_switch_condition : Warning<
	"switch condition has boolean value">, InGroup<SwitchBool>;
	def warn_case_value_overflow : Warning<
	"overflow converting case value to switch condition type (%0 to %1)">,
	InGroup<Switch>;
	def err_duplicate_case : Error<"duplicate case value '%0'">;
	def err_duplicate_case_differing_expr : Error<
	"duplicate case value: '%0' and '%1' both equal '%2'">;
	def warn_case_empty_range : Warning<"empty case range specified">;
	def warn_missing_case_for_condition :
	Warning<"no case matching constant switch condition '%0'">;

	def warn_def_missing_case : Warning<"%plural{"
	"1:enumeration value %1 not explicitly handled in switch\|"
	"2:enumeration values %1 and %2 not explicitly handled in switch\|"
	"3:enumeration values %1, %2, and %3 not explicitly handled in switch\|"
	":%0 enumeration values not explicitly handled in switch: %1, %2, %3...}0">,
	InGroup<SwitchEnum>, DefaultIgnore;

	def warn_missing_case : Warning<"%plural{"
	"1:enumeration value %1 not handled in switch\|"
	"2:enumeration values %1 and %2 not handled in switch\|"
	"3:enumeration values %1, %2, and %3 not handled in switch\|"
	":%0 enumeration values not handled in switch: %1, %2, %3...}0">,
	InGroup<Switch>;

	def warn_unannotated_fallthrough : Warning<
	"unannotated fall-through between switch labels">,
	InGroup<ImplicitFallthrough>, DefaultIgnore;
	def warn_unannotated_fallthrough_per_function : Warning<
	"unannotated fall-through between switch labels in partly-annotated "
	"function">, InGroup<ImplicitFallthroughPerFunction>, DefaultIgnore;
	def note_insert_fallthrough_fixit : Note<
	"insert '%0;' to silence this warning">;
	def note_insert_break_fixit : Note<
	"insert 'break;' to avoid fall-through">;
	def err_fallthrough_attr_wrong_target : Error<
	"%0 attribute is only allowed on empty statements">;
	def note_fallthrough_insert_semi_fixit : Note<"did you forget ';'?">;
	def err_fallthrough_attr_outside_switch : Error<
	"fallthrough annotation is outside switch statement">;
	def err_fallthrough_attr_invalid_placement : Error<
	"fallthrough annotation does not directly precede switch label">;

	def warn_unreachable_default : Warning<
	"default label in switch which covers all enumeration values">,
	InGroup<CoveredSwitchDefault>, DefaultIgnore;
	def warn_not_in_enum : Warning<"case value not in enumerated type %0">,
	InGroup<Switch>;
	def warn_not_in_enum_assignment : Warning<"integer constant not in range "
	"of enumerated type %0">, InGroup<DiagGroup<"assign-enum">>, DefaultIgnore;
	def err_typecheck_statement_requires_scalar : Error<
	"statement requires expression of scalar type (%0 invalid)">;
	def err_typecheck_statement_requires_integer : Error<
	"statement requires expression of integer type (%0 invalid)">;
	def err_multiple_default_labels_defined : Error<
	"multiple default labels in one switch">;
	def err_switch_multiple_conversions : Error<
	"multiple conversions from switch condition type %0 to an integral or "
	"enumeration type">;
	def note_switch_conversion : Note<
	"conversion to %select{integral\|enumeration}0 type %1">;
	def err_switch_explicit_conversion : Error<
	"switch condition type %0 requires explicit conversion to %1">;
	def err_switch_incomplete_class_type : Error<
	"switch condition has incomplete class type %0">;

	def warn_empty_if_body : Warning<
	"if statement has empty body">, InGroup<EmptyBody>;
	def warn_empty_for_body : Warning<
	"for loop has empty body">, InGroup<EmptyBody>;
	def warn_empty_range_based_for_body : Warning<
	"range-based for loop has empty body">, InGroup<EmptyBody>;
	def warn_empty_while_body : Warning<
	"while loop has empty body">, InGroup<EmptyBody>;
	def warn_empty_switch_body : Warning<
	"switch statement has empty body">, InGroup<EmptyBody>;
	def note_empty_body_on_separate_line : Note<
	"put the semicolon on a separate line to silence this warning">;

	def err_va_start_captured_stmt : Error<
	"'va_start' cannot be used in a captured statement">;
	def err_va_start_outside_function : Error<
	"'va_start' cannot be used outside a function">;
	def err_va_start_fixed_function : Error<
	"'va_start' used in function with fixed args">;
	def err_va_start_used_in_wrong_abi_function : Error<
	"'va_start' used in %select{System V\|Win64}0 ABI function">;
	def err_ms_va_start_used_in_sysv_function : Error<
	"'__builtin_ms_va_start' used in System V ABI function">;
	def warn_second_arg_of_va_start_not_last_named_param : Warning<
	"second argument to 'va_start' is not the last named parameter">,
	InGroup<Varargs>;
	def warn_c17_compat_ellipsis_only_parameter : Warning<
	"'...' as the only parameter of a function is incompatible with C standards "
	"before C2x">, DefaultIgnore, InGroup<CPre2xCompat>;
	def warn_va_start_type_is_undefined : Warning<
	"passing %select{an object that undergoes default argument promotion\|"
	"an object of reference type\|a parameter declared with the 'register' "
	"keyword}0 to 'va_start' has undefined behavior">, InGroup<Varargs>;
	def err_first_argument_to_va_arg_not_of_type_va_list : Error<
	"first argument to 'va_arg' is of type %0 and not 'va_list'">;
	def err_second_parameter_to_va_arg_incomplete: Error<
	"second argument to 'va_arg' is of incomplete type %0">;
	def err_second_parameter_to_va_arg_abstract: Error<
	"second argument to 'va_arg' is of abstract type %0">;
	def warn_second_parameter_to_va_arg_not_pod : Warning<
	"second argument to 'va_arg' is of non-POD type %0">,
	InGroup<NonPODVarargs>, DefaultError;
	def warn_second_parameter_to_va_arg_ownership_qualified : Warning<
	"second argument to 'va_arg' is of ARC ownership-qualified type %0">,
	InGroup<NonPODVarargs>, DefaultError;
	def warn_second_parameter_to_va_arg_never_compatible : Warning<
	"second argument to 'va_arg' is of promotable type %0; this va_arg has "
	"undefined behavior because arguments will be promoted to %1">, InGroup<Varargs>;

	def warn_return_missing_expr : Warning<
	"non-void %select{function\|method}1 %0 should return a value">, DefaultError,
	InGroup<ReturnType>;
	def ext_return_missing_expr : ExtWarn<
	"non-void %select{function\|method}1 %0 should return a value">, DefaultError,
	InGroup<ReturnType>;
	def ext_return_has_expr : ExtWarn<
	"%select{void function\|void method\|constructor\|destructor}1 %0 "
	"should not return a value">,
	DefaultError, InGroup<ReturnType>;
	def ext_return_has_void_expr : Extension<
	"void %select{function\|method\|block}1 %0 should not return void expression">;
	def err_return_init_list : Error<
	"%select{void function\|void method\|constructor\|destructor}1 %0 "
	"must not return a value">;
	def err_ctor_dtor_returns_void : Error<
	"%select{constructor\|destructor}1 %0 must not return void expression">;
	def warn_noreturn_function_has_return_expr : Warning<
	"function %0 declared 'noreturn' should not return">,
	InGroup<InvalidNoreturn>;
	def warn_falloff_noreturn_function : Warning<
	"function declared 'noreturn' should not return">,
	InGroup<InvalidNoreturn>;
	def err_noreturn_block_has_return_expr : Error<
	"block declared 'noreturn' should not return">;
	def err_carries_dependency_missing_on_first_decl : Error<
	"%select{function\|parameter}0 declared '[[carries_dependency]]' "
	"after its first declaration">;
	def note_carries_dependency_missing_first_decl : Note<
	"declaration missing '[[carries_dependency]]' attribute is here">;
	def err_carries_dependency_param_not_function_decl : Error<
	"'[[carries_dependency]]' attribute only allowed on parameter in a function "
	"declaration or lambda">;
	def err_block_on_nonlocal : Error<
	"__block attribute not allowed, only allowed on local variables">;
	def err_block_on_vm : Error<
	"__block attribute not allowed on declaration with a variably modified type">;
	def err_sizeless_nonlocal : Error<
	"non-local variable with sizeless type %0">;

	def err_vec_builtin_non_vector : Error<
	"first two arguments to %0 must be vectors">;
	def err_vec_builtin_incompatible_vector : Error<
	"first two arguments to %0 must have the same type">;
	def err_vsx_builtin_nonconstant_argument : Error<
	"argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;

	def err_shufflevector_nonconstant_argument : Error<
	"index for __builtin_shufflevector must be a constant integer">;
	def err_shufflevector_argument_too_large : Error<
	"index for __builtin_shufflevector must be less than the total number "
	"of vector elements">;

	def err_convertvector_non_vector : Error<
	"first argument to __builtin_convertvector must be a vector">;
	def err_convertvector_non_vector_type : Error<
	"second argument to __builtin_convertvector must be a vector type">;
	def err_convertvector_incompatible_vector : Error<
	"first two arguments to __builtin_convertvector must have the same number of elements">;

	def err_first_argument_to_cwsc_not_call : Error<
	"first argument to __builtin_call_with_static_chain must be a non-member call expression">;
	def err_first_argument_to_cwsc_block_call : Error<
	"first argument to __builtin_call_with_static_chain must not be a block call">;
	def err_first_argument_to_cwsc_builtin_call : Error<
	"first argument to __builtin_call_with_static_chain must not be a builtin call">;
	def err_first_argument_to_cwsc_pdtor_call : Error<
	"first argument to __builtin_call_with_static_chain must not be a pseudo-destructor call">;
	def err_second_argument_to_cwsc_not_pointer : Error<
	"second argument to __builtin_call_with_static_chain must be of pointer type">;

	def err_vector_incorrect_num_initializers : Error<
	"%select{too many\|too few}0 elements in vector initialization (expected %1 elements, have %2)">;
	def err_altivec_empty_initializer : Error<"expected initializer">;

	def err_invalid_neon_type_code : Error<
	"incompatible constant for this __builtin_neon function">;
	def err_argument_invalid_range : Error<
	"argument value %0 is outside the valid range [%1, %2]">;
	def warn_argument_invalid_range : Warning<
	"argument value %0 is outside the valid range [%1, %2]">, DefaultError,
	InGroup<DiagGroup<"argument-outside-range">>;
	def warn_argument_undefined_behaviour : Warning<
	"argument value %0 will result in undefined behaviour">,
	InGroup<DiagGroup<"argument-undefined-behaviour">>;
	def err_argument_not_multiple : Error<
	"argument should be a multiple of %0">;
	def err_argument_not_power_of_2 : Error<
	"argument should be a power of 2">;
	def err_argument_not_shifted_byte : Error<
	"argument should be an 8-bit value shifted by a multiple of 8 bits">;
	def err_argument_not_shifted_byte_or_xxff : Error<
	"argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF">;
	def err_argument_not_contiguous_bit_field : Error<
	"argument %0 value should represent a contiguous bit field">;
	def err_rotation_argument_to_cadd : Error<
	"argument should be the value 90 or 270">;
	def err_rotation_argument_to_cmla : Error<
	"argument should be the value 0, 90, 180 or 270">;
	def warn_neon_vector_initializer_non_portable : Warning<
	"vector initializers are not compatible with NEON intrinsics in big endian "
	"mode">, InGroup<DiagGroup<"nonportable-vector-initialization">>;
	def note_neon_vector_initializer_non_portable : Note<
	"consider using vld1_%0%1() to initialize a vector from memory, or "
	"vcreate_%0%1() to initialize from an integer constant">;
	def note_neon_vector_initializer_non_portable_q : Note<
	"consider using vld1q_%0%1() to initialize a vector from memory, or "
	"vcombine_%0%1(vcreate_%0%1(), vcreate_%0%1()) to initialize from integer "
	"constants">;
	def err_systemz_invalid_tabort_code : Error<
	"invalid transaction abort code">;
	def err_64_bit_builtin_32_bit_tgt : Error<
	"this builtin is only available on 64-bit targets">;
	def err_32_bit_builtin_64_bit_tgt : Error<
	"this builtin is only available on 32-bit targets">;
	def err_builtin_x64_aarch64_only : Error<
	"this builtin is only available on x86-64 and aarch64 targets">;
	def err_mips_builtin_requires_dsp : Error<
	"this builtin requires 'dsp' ASE, please use -mdsp">;
	def err_mips_builtin_requires_dspr2 : Error<
	"this builtin requires 'dsp r2' ASE, please use -mdspr2">;
	def err_mips_builtin_requires_msa : Error<
	"this builtin requires 'msa' ASE, please use -mmsa">;
	def err_ppc_builtin_only_on_arch : Error<
	"this builtin is only valid on POWER%0 or later CPUs">;
	def err_ppc_builtin_requires_vsx : Error<
	"this builtin requires VSX to be enabled">;
	def err_ppc_builtin_requires_htm : Error<
	"this builtin requires HTM to be enabled">;
	def err_ppc_builtin_requires_abi : Error<
	"this builtin requires ABI -mabi=%0">;
	def err_ppc_invalid_use_mma_type : Error<
	"invalid use of PPC MMA type">;
	def err_ppc_invalid_test_data_class_type : Error<
	"expected a 'float', 'double' or '__float128' for the first argument">;
	def err_x86_builtin_invalid_rounding : Error<
	"invalid rounding argument">;
	def err_x86_builtin_invalid_scale : Error<
	"scale argument must be 1, 2, 4, or 8">;
	def err_x86_builtin_tile_arg_duplicate : Error<
	"tile arguments must refer to different tiles">;

	def err_builtin_target_unsupported : Error<
	"builtin is not supported on this target">;
	def err_builtin_longjmp_unsupported : Error<
	"__builtin_longjmp is not supported for the current target">;
	def err_builtin_setjmp_unsupported : Error<
	"__builtin_setjmp is not supported for the current target">;

	def err_builtin_longjmp_invalid_val : Error<
	"argument to __builtin_longjmp must be a constant 1">;
	def err_builtin_requires_language : Error<"'%0' is only available in %1">;

	def err_constant_integer_arg_type : Error<
	"argument to %0 must be a constant integer">;

	def ext_mixed_decls_code : Extension<
	"mixing declarations and code is a C99 extension">,
	InGroup<DeclarationAfterStatement>;
	def warn_mixed_decls_code : Warning<
	"mixing declarations and code is incompatible with standards before C99">,
	InGroup<DeclarationAfterStatement>, DefaultIgnore;

	def err_non_local_variable_decl_in_for : Error<
	"declaration of non-local variable in 'for' loop">;
	def err_non_variable_decl_in_for : Error<
	"non-variable declaration in 'for' loop">;
	def err_toomany_element_decls : Error<
	"only one element declaration is allowed">;
	def err_selector_element_not_lvalue : Error<
	"selector element is not a valid lvalue">;
	def err_selector_element_type : Error<
	"selector element type %0 is not a valid object">;
	def err_selector_element_const_type : Error<
	"selector element of type %0 cannot be a constant lvalue expression">;
	def err_collection_expr_type : Error<
	"the type %0 is not a pointer to a fast-enumerable object">;
	def warn_collection_expr_type : Warning<
	"collection expression type %0 may not respond to %1">;

	def err_invalid_conversion_between_ext_vectors : Error<
	"invalid conversion between ext-vector type %0 and %1">;

	def warn_duplicate_attribute_exact : Warning<
	"attribute %0 is already applied">, InGroup<IgnoredAttributes>;

	def warn_duplicate_attribute : Warning<
	"attribute %0 is already applied with different arguments">,
	InGroup<IgnoredAttributes>;
	def err_disallowed_duplicate_attribute : Error<
	"attribute %0 cannot appear more than once on a declaration">;

	def warn_sync_fetch_and_nand_semantics_change : Warning<
	"the semantics of this intrinsic changed with GCC "
	"version 4.4 - the newer semantics are provided here">,
	InGroup<DiagGroup<"sync-fetch-and-nand-semantics-changed">>;

	// Type
	def ext_wchar_t_sign_spec : ExtWarn<"'%0' cannot be signed or unsigned">,
	InGroup<DiagGroup<"signed-unsigned-wchar">>, DefaultError;
	def warn_receiver_forward_class : Warning<
	"receiver %0 is a forward class and corresponding @interface may not exist">,
	InGroup<ForwardClassReceiver>;
	def note_method_sent_forward_class : Note<"method %0 is used for the forward class">;
	def ext_missing_type_specifier : ExtWarn<
	"type specifier missing, defaults to 'int'; ISO C99 and later do not support "
	"implicit int">, InGroup<ImplicitInt>;
	def err_missing_type_specifier : Error<
	"a type specifier is required for all declarations">;
	def err_decimal_unsupported : Error<
	"GNU decimal type extension not supported">;
	def err_objc_array_of_interfaces : Error<
	"array of interface %0 is invalid (probably should be an array of pointers)">;
	def ext_c99_array_usage : Extension<
	"%select{qualifier in \|static \|}0array size %select{\|\|'[*] '}0is a C99 "
	"feature">, InGroup<C99>;
	def err_c99_array_usage_cxx : Error<
	"%select{qualifier in \|static \|}0array size %select{\|\|'[*] '}0is a C99 "
	"feature, not permitted in C++">;
	def err_type_unsupported : Error<
	"%0 is not supported on this target">;
	def err_nsconsumed_attribute_mismatch : Error<
	"overriding method has mismatched ns_consumed attribute on its"
	" parameter">;
	def err_nsreturns_retained_attribute_mismatch : Error<
	"overriding method has mismatched ns_returns_%select{not_retained\|retained}0"
	" attributes">;
	def err_nserrordomain_invalid_decl : Error<
	"domain argument %select{\|%1 }0does not refer to global constant">;
	def err_nserrordomain_wrong_type : Error<
	"domain argument %0 does not point to an NSString or CFString constant">;

	def warn_nsconsumed_attribute_mismatch : Warning<
	err_nsconsumed_attribute_mismatch.Summary>, InGroup<NSConsumedMismatch>;
	def warn_nsreturns_retained_attribute_mismatch : Warning<
	err_nsreturns_retained_attribute_mismatch.Summary>, InGroup<NSReturnsMismatch>;

	def note_getter_unavailable : Note<
	"or because setter is declared here, but no getter method %0 is found">;
	def err_invalid_protocol_qualifiers : Error<
	"invalid protocol qualifiers on non-ObjC type">;
	def warn_ivar_use_hidden : Warning<
	"local declaration of %0 hides instance variable">,
	InGroup<ShadowIvar>;
	def warn_direct_initialize_call : Warning<
	"explicit call to +initialize results in duplicate call to +initialize">,
	InGroup<ExplicitInitializeCall>;
	def warn_direct_super_initialize_call : Warning<
	"explicit call to [super initialize] should only be in implementation "
	"of +initialize">,
	InGroup<ExplicitInitializeCall>;
	def err_ivar_use_in_class_method : Error<
	"instance variable %0 accessed in class method">;
	def err_private_ivar_access : Error<"instance variable %0 is private">,
	AccessControl;
	def err_protected_ivar_access : Error<"instance variable %0 is protected">,
	AccessControl;
	def warn_maynot_respond : Warning<"%0 may not respond to %1">;
	def ext_typecheck_base_super : Warning<
	"method parameter type "
	"%diff{$ does not match super class method parameter type $\|"
	"does not match super class method parameter type}0,1">,
	InGroup<SuperSubClassMismatch>, DefaultIgnore;
	def warn_missing_method_return_type : Warning<
	"method has no return type specified; defaults to 'id'">,
	InGroup<MissingMethodReturnType>, DefaultIgnore;
	def warn_direct_ivar_access : Warning<"instance variable %0 is being "
	"directly accessed">, InGroup<DiagGroup<"direct-ivar-access">>, DefaultIgnore;

	// Spell-checking diagnostics
	def err_unknown_typename : Error<
	"unknown type name %0">;
	def err_unknown_type_or_class_name_suggest : Error<
	"unknown %select{type\|class}1 name %0; did you mean %2?">;
	def err_unknown_typename_suggest : Error<
	"unknown type name %0; did you mean %1?">;
	def err_unknown_nested_typename_suggest : Error<
	"no type named %0 in %1; did you mean %select{\|simply }2%3?">;
	def err_no_member_suggest : Error<"no member named %0 in %1; did you mean %select{\|simply }2%3?">;
	def err_undeclared_use_suggest : Error<
	"use of undeclared %0; did you mean %1?">;
	def err_undeclared_var_use_suggest : Error<
	"use of undeclared identifier %0; did you mean %1?">;
	def err_no_template : Error<"no template named %0">;
	def err_no_template_suggest : Error<"no template named %0; did you mean %1?">;
	def err_no_member_template : Error<"no template named %0 in %1">;
	def err_no_member_template_suggest : Error<
	"no template named %0 in %1; did you mean %select{\|simply }2%3?">;
	def err_non_template_in_template_id : Error<
	"%0 does not name a template but is followed by template arguments">;
	def err_non_template_in_template_id_suggest : Error<
	"%0 does not name a template but is followed by template arguments; "
	"did you mean %1?">;
	def err_non_template_in_member_template_id_suggest : Error<
	"member %0 of %1 is not a template; did you mean %select{\|simply }2%3?">;
	def note_non_template_in_template_id_found : Note<
	"non-template declaration found by name lookup">;
	def err_mem_init_not_member_or_class_suggest : Error<
	"initializer %0 does not name a non-static data member or base "
	"class; did you mean the %select{base class\|member}1 %2?">;
	def err_field_designator_unknown_suggest : Error<
	"field designator %0 does not refer to any field in type %1; did you mean "
	"%2?">;
	def err_typecheck_member_reference_ivar_suggest : Error<
	"%0 does not have a member named %1; did you mean %2?">;
	def err_property_not_found_suggest : Error<
	"property %0 not found on object of type %1; did you mean %2?">;
	def err_class_property_found : Error<
	"property %0 is a class property; did you mean to access it with class '%1'?">;
	def err_ivar_access_using_property_syntax_suggest : Error<
	"property %0 not found on object of type %1; did you mean to access instance variable %2?">;
	def warn_property_access_suggest : Warning<
	"property %0 not found on object of type %1; did you mean to access property %2?">,
	InGroup<PropertyAccessDotSyntax>;
	def err_property_found_suggest : Error<
	"property %0 found on object of type %1; did you mean to access "
	"it with the \".\" operator?">;
	def err_undef_interface_suggest : Error<
	"cannot find interface declaration for %0; did you mean %1?">;
	def warn_undef_interface_suggest : Warning<
	"cannot find interface declaration for %0; did you mean %1?">;
	def err_undef_superclass_suggest : Error<
	"cannot find interface declaration for %0, superclass of %1; did you mean "
	"%2?">;
	def err_undeclared_protocol_suggest : Error<
	"cannot find protocol declaration for %0; did you mean %1?">;
	def note_base_class_specified_here : Note<
	"base class %0 specified here">;
	def err_using_directive_suggest : Error<
	"no namespace named %0; did you mean %1?">;
	def err_using_directive_member_suggest : Error<
	"no namespace named %0 in %1; did you mean %select{\|simply }2%3?">;
	def note_namespace_defined_here : Note<"namespace %0 defined here">;
	def err_sizeof_pack_no_pack_name_suggest : Error<
	"%0 does not refer to the name of a parameter pack; did you mean %1?">;
	def note_parameter_pack_here : Note<"parameter pack %0 declared here">;

	def err_uncasted_use_of_unknown_any : Error<
	"%0 has unknown type; cast it to its declared type to use it">;
	def err_uncasted_call_of_unknown_any : Error<
	"%0 has unknown return type; cast the call to its declared return type">;
	def err_uncasted_send_to_unknown_any_method : Error<
	"no known method %select{%objcinstance1\|%objcclass1}0; cast the "
	"message send to the method's return type">;
	def err_unsupported_unknown_any_decl : Error<
	"%0 has unknown type, which is not supported for this kind of declaration">;
	def err_unsupported_unknown_any_expr : Error<
	"unsupported expression with unknown type">;
	def err_unsupported_unknown_any_call : Error<
	"call to unsupported expression with unknown type">;
	def err_unknown_any_addrof : Error<
	"the address of a declaration with unknown type "
	"can only be cast to a pointer type">;
	def err_unknown_any_addrof_call : Error<
	"address-of operator cannot be applied to a call to a function with "
	"unknown return type">;
	def err_unknown_any_var_function_type : Error<
	"variable %0 with unknown type cannot be given a function type">;
	def err_unknown_any_function : Error<
	"function %0 with unknown type must be given a function type">;

	def err_filter_expression_integral : Error<
	"filter expression has non-integral type %0">;

	def err_non_asm_stmt_in_naked_function : Error<
	"non-ASM statement in naked function is not supported">;
	def err_asm_naked_this_ref : Error<
	"'this' pointer references not allowed in naked functions">;
	def err_asm_naked_parm_ref : Error<
	"parameter references not allowed in naked functions">;

	// OpenCL warnings and errors.
	def err_invalid_astype_of_different_size : Error<
	"invalid reinterpretation: sizes of %0 and %1 must match">;
	def err_static_kernel : Error<
	"kernel functions cannot be declared static">;
	def err_method_kernel : Error<
	"kernel functions cannot be class members">;
	def err_template_kernel : Error<
	"kernel functions cannot be used in a template declaration, instantiation or specialization">;
	def err_opencl_ptrptr_kernel_param : Error<
	"kernel parameter cannot be declared as a pointer to a pointer">;
	def err_kernel_arg_address_space : Error<
	"pointer arguments to kernel functions must reside in '__global', "
	"'__constant' or '__local' address space">;
	def err_opencl_ext_vector_component_invalid_length : Error<
	"vector component access has invalid length %0. Supported: 1,2,3,4,8,16.">;
	def err_opencl_function_variable : Error<
	"%select{non-kernel function\|function scope}0 variable cannot be declared in %1 address space">;
	def err_opencl_addrspace_scope : Error<
	"variables in the %0 address space can only be declared in the outermost "
	"scope of a kernel function">;
	def err_static_function_scope : Error<
	"variables in function scope cannot be declared static">;
	def err_opencl_bitfields : Error<
	"bit-fields are not supported in OpenCL">;
	def err_opencl_vla : Error<
	"variable length arrays are not supported in OpenCL">;
	def err_opencl_scalar_type_rank_greater_than_vector_type : Error<
	"scalar operand type has greater rank than the type of the vector "
	"element. (%0 and %1)">;
	def err_bad_kernel_param_type : Error<
	"%0 cannot be used as the type of a kernel parameter">;
	def err_record_with_pointers_kernel_param : Error<
	"%select{struct\|union}0 kernel parameters may not contain pointers">;
	def note_within_field_of_type : Note<
	"within field of type %0 declared here">;
	def note_illegal_field_declared_here : Note<
	"field of illegal %select{type\|pointer type}0 %1 declared here">;
	def err_opencl_type_struct_or_union_field : Error<
	"the %0 type cannot be used to declare a structure or union field">;
	def err_event_t_addr_space_qual : Error<
	"the event_t type can only be used with __private address space qualifier">;
	def err_expected_kernel_void_return_type : Error<
	"kernel must have void return type">;
	def err_sampler_initializer_not_integer : Error<
	"sampler_t initialization requires 32-bit integer, not %0">;
	def warn_sampler_initializer_invalid_bits : Warning<
	"sampler initializer has invalid %0 bits">, InGroup<SpirCompat>, DefaultIgnore;
	def err_sampler_argument_required : Error<
	"sampler_t variable required - got %0">;
	def err_wrong_sampler_addressspace: Error<
	"sampler type cannot be used with the __local and __global address space qualifiers">;
	def err_opencl_nonconst_global_sampler : Error<
	"global sampler requires a const or constant address space qualifier">;
	def err_opencl_cast_non_zero_to_event_t : Error<
	"cannot cast non-zero value '%0' to 'event_t'">;
	def err_opencl_global_invalid_addr_space : Error<
	"%select{program scope\|static local\|extern}0 variable must reside in %1 address space">;
	def err_missing_actual_pipe_type : Error<
	"missing actual type specifier for pipe">;
	def err_reference_pipe_type : Error <
	"pipes packet types cannot be of reference type">;
	def err_opencl_no_main : Error<"%select{function\|kernel}0 cannot be called 'main'">;
	def err_opencl_kernel_attr :
	Error<"attribute %0 can only be applied to an OpenCL kernel function">;
	def err_return_value_with_address_space : Error<
	"return type cannot be qualified with address space">;
	def err_opencl_constant_no_init : Error<
	"variable in constant address space must be initialized">;
	def err_opencl_atomic_init: Error<
	"atomic variable can be %select{assigned\|initialized}0 to a variable only "
	"in global address space">;
	def err_opencl_implicit_vector_conversion : Error<
	"implicit conversions between vector types (%0 and %1) are not permitted">;
	def err_opencl_invalid_type_array : Error<
	"array of %0 type is invalid in OpenCL">;
	def err_opencl_ternary_with_block : Error<
	"block type cannot be used as expression in ternary expression in OpenCL">;
	def err_opencl_pointer_to_type : Error<
	"pointer to type %0 is invalid in OpenCL">;
	def err_opencl_type_can_only_be_used_as_function_parameter : Error <
	"type %0 can only be used as a function parameter in OpenCL">;
	def err_opencl_type_not_found : Error<
	"%0 type %1 not found; include the base header with -finclude-default-header">;
	def warn_opencl_attr_deprecated_ignored : Warning <
	"%0 attribute is deprecated and ignored in %1">, InGroup<IgnoredAttributes>;
	def err_opencl_variadic_function : Error<
	"invalid prototype, variadic arguments are not allowed in OpenCL">;
	def err_opencl_requires_extension : Error<
	"use of %select{type\|declaration}0 %1 requires %2 support">;
	def ext_opencl_double_without_pragma : Extension<
	"Clang permits use of type 'double' regardless pragma if 'cl_khr_fp64' is"
	" supported">;
	def warn_opencl_generic_address_space_arg : Warning<
	"passing non-generic address space pointer to %0"
	" may cause dynamic conversion affecting performance">,
	InGroup<Conversion>, DefaultIgnore;

	// OpenCL v2.0 s6.13.6 -- Builtin Pipe Functions
	def err_opencl_builtin_pipe_first_arg : Error<
	"first argument to %0 must be a pipe type">;
	def err_opencl_builtin_pipe_arg_num : Error<
	"invalid number of arguments to function: %0">;
	def err_opencl_builtin_pipe_invalid_arg : Error<
	"invalid argument type to function %0 (expecting %1 having %2)">;
	def err_opencl_builtin_pipe_invalid_access_modifier : Error<
	"invalid pipe access modifier (expecting %0)">;

	// OpenCL access qualifier
	def err_opencl_invalid_access_qualifier : Error<
	"access qualifier can only be used for pipe and image type">;
	def err_opencl_invalid_read_write : Error<
	"access qualifier %0 can not be used for %1 %select{\|prior to OpenCL C version 2.0 or in version 3.0 "
	"and without __opencl_c_read_write_images feature}2">;
	def err_opencl_multiple_access_qualifiers : Error<
	"multiple access qualifiers">;
	def note_opencl_typedef_access_qualifier : Note<
	"previously declared '%0' here">;

	// OpenCL v2.0 s6.12.5 Blocks restrictions
	def err_opencl_block_storage_type : Error<
	"the __block storage type is not permitted">;
	def err_opencl_invalid_block_declaration : Error<
	"invalid block variable declaration - must be %select{const qualified\|initialized}0">;
	def err_opencl_extern_block_declaration : Error<
	"invalid block variable declaration - using 'extern' storage class is disallowed">;
	def err_opencl_block_ref_block : Error<
	"cannot refer to a block inside block">;

	// OpenCL v2.0 s6.13.9 - Address space qualifier functions.
	def err_opencl_builtin_to_addr_invalid_arg : Error<
	"invalid argument %0 to function: %1, expecting a generic pointer argument">;

	// OpenCL v2.0 s6.13.17 Enqueue kernel restrictions.
	def err_opencl_enqueue_kernel_incorrect_args : Error<
	"illegal call to enqueue_kernel, incorrect argument types">;
	def err_opencl_enqueue_kernel_local_size_args : Error<
	"mismatch in number of block parameters and local size arguments passed">;
	def err_opencl_enqueue_kernel_invalid_local_size_type : Error<
	"illegal call to enqueue_kernel, parameter needs to be specified as integer type">;
	def err_opencl_enqueue_kernel_blocks_non_local_void_args : Error<
	"blocks used in enqueue_kernel call are expected to have parameters of type 'local void*'">;
	def err_opencl_enqueue_kernel_blocks_no_args : Error<
	"blocks with parameters are not accepted in this prototype of enqueue_kernel call">;

	def err_opencl_builtin_expected_type : Error<
	"illegal call to %0, expected %1 argument type">;

	// OpenCL v3.0 s6.3.7 - Vector Components
	def ext_opencl_ext_vector_type_rgba_selector: ExtWarn<
	"vector component name '%0' is a feature from OpenCL version 3.0 onwards">,
	InGroup<OpenCLUnsupportedRGBA>;

	def err_openclcxx_placement_new : Error<
	"use of placement new requires explicit declaration">;

	// MIG routine annotations.
	def warn_mig_server_routine_does_not_return_kern_return_t : Warning<
	"'mig_server_routine' attribute only applies to routines that return a kern_return_t">,
	InGroup<IgnoredAttributes>;

	def warn_imp_cast_drops_unaligned : Warning<
	"implicit cast from type %0 to type %1 drops __unaligned qualifier">,
	InGroup<DiagGroup<"unaligned-qualifier-implicit-cast">>;

	} // end of sema category

	let CategoryName = "OpenMP Issue" in {
	// OpenMP support.
	def err_omp_expected_var_arg : Error<
	"%0 is not a global variable, static local variable or static data member">;
	def err_omp_expected_var_arg_suggest : Error<
	"%0 is not a global variable, static local variable or static data member; "
	"did you mean %1">;
	def err_omp_global_var_arg : Error<
	"arguments of '#pragma omp %0' must have %select{global storage\|static storage duration}1">;
	def err_omp_ref_type_arg : Error<
	"arguments of '#pragma omp %0' cannot be of reference type %1">;
	def err_omp_region_not_file_context : Error<
	"directive must be at file or namespace scope">;
	def err_omp_var_scope : Error<
	"'#pragma omp %0' must appear in the scope of the %q1 variable declaration">;
	def err_omp_var_used : Error<
	"'#pragma omp %0' must precede all references to variable %q1">;
	def err_omp_var_thread_local : Error<
	"variable %0 cannot be threadprivate because it is %select{thread-local\|a global named register variable}1">;
	def err_omp_private_incomplete_type : Error<
	"a private variable with incomplete type %0">;
	def err_omp_firstprivate_incomplete_type : Error<
	"a firstprivate variable with incomplete type %0">;
	def err_omp_lastprivate_incomplete_type : Error<
	"a lastprivate variable with incomplete type %0">;
	def err_omp_reduction_incomplete_type : Error<
	"a reduction list item with incomplete type %0">;
	def err_omp_unexpected_clause_value : Error<
	"expected %0 in OpenMP clause '%1'">;
	def err_omp_unexpected_call_to_omp_runtime_api
	: Error<"calls to OpenMP runtime API are not allowed within a region that "
	"corresponds to a construct with an order clause that specifies "
	"concurrent">;
	def err_omp_expected_var_name_member_expr : Error<
	"expected variable name%select{\| or data member of current class}0">;
	def err_omp_expected_var_name_member_expr_with_type : Error<
	"expected variable%select{\| or static data member\|, static data member, "
	"or non-static data member of current class}0 of type '%1'">;
	def err_omp_expected_var_name_member_expr_or_array_item : Error<
	"expected variable name%select{\|, data member of current class}0, array element or array section">;
	def err_omp_expected_addressable_lvalue_or_array_item : Error<
	"expected addressable lvalue expression, array element%select{ or array section\|, array section or array shaping expression}0%select{\| of non 'omp_depend_t' type}1">;
	def err_omp_expected_named_var_member_or_array_expression: Error<
	"expected expression containing only member accesses and/or array sections based on named variables">;
	def err_omp_bit_fields_forbidden_in_clause : Error<
	"bit fields cannot be used to specify storage in a '%0' clause">;
	def err_array_section_does_not_specify_contiguous_storage : Error<
	"array section does not specify contiguous storage">;
	def err_array_section_does_not_specify_length : Error<
	"array section does not specify length for outermost dimension">;
	def err_omp_union_type_not_allowed : Error<
	"mapping of union members is not allowed">;
	def err_omp_expected_access_to_data_field : Error<
	"expected access to data field">;
	def err_omp_multiple_array_items_in_map_clause : Error<
	"multiple array elements associated with the same variable are not allowed in map clauses of the same construct">;
	def err_omp_duplicate_map_type_modifier : Error<
	"same map type modifier has been specified more than once">;
	def err_omp_duplicate_motion_modifier : Error<
	"same motion modifier has been specified more than once">;
	def err_omp_pointer_mapped_along_with_derived_section : Error<
	"pointer cannot be mapped along with a section derived from itself">;
	def err_omp_original_storage_is_shared_and_does_not_contain : Error<
	"original storage of expression in data environment is shared but data environment do not fully contain mapped expression storage">;
	def err_omp_same_pointer_dereferenced : Error<
	"same pointer dereferenced in multiple different ways in map clause expressions">;
	def note_omp_task_predetermined_firstprivate_here : Note<
	"predetermined as a firstprivate in a task construct here">;
	def err_omp_threadprivate_incomplete_type : Error<
	"threadprivate variable with incomplete type %0">;
	def err_omp_no_dsa_for_variable : Error<
	"variable %0 must have explicitly specified data sharing attributes">;
	def err_omp_defaultmap_no_attr_for_variable : Error<
	"variable %0 must have explicitly specified data sharing attributes, data mapping attributes, or in an is_device_ptr clause">;
	def note_omp_default_dsa_none : Note<
	"explicit data sharing attribute requested here">;
	def note_omp_defaultmap_attr_none : Note<
	"explicit data sharing attribute, data mapping attribute, or is_device_ptr clause requested here">;
	def err_omp_wrong_dsa : Error<
	"%0 variable cannot be %1">;
	def err_omp_variably_modified_type_not_supported : Error<
	"arguments of OpenMP clause '%0' in '#pragma omp %2' directive cannot be of variably-modified type %1">;
	def note_omp_explicit_dsa : Note<
	"defined as %0">;
	def note_omp_predetermined_dsa : Note<
	"%select{static data member is predetermined as shared\|"
	"variable with static storage duration is predetermined as shared\|"
	"loop iteration variable is predetermined as private\|"
	"loop iteration variable is predetermined as linear\|"
	"loop iteration variable is predetermined as lastprivate\|"
	"constant variable is predetermined as shared\|"
	"global variable is predetermined as shared\|"
	"non-shared variable in a task construct is predetermined as firstprivate\|"
	"variable with automatic storage duration is predetermined as private}0"
	"%select{\|; perhaps you forget to enclose 'omp %2' directive into a parallel or another task region?}1">;
	def note_omp_implicit_dsa : Note<
	"implicitly determined as %0">;
	def err_omp_loop_var_dsa : Error<
	"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
	def err_omp_not_for : Error<
	"%select{statement after '#pragma omp %1' must be a for loop\|"
	"expected %2 for loops after '#pragma omp %1'%select{\|, but found only %4}3}0">;
	def note_omp_collapse_ordered_expr : Note<
	"as specified in %select{'collapse'\|'ordered'\|'collapse' and 'ordered'}0 clause%select{\|\|s}0">;
	def err_omp_negative_expression_in_clause : Error<
	"argument to '%0' clause must be a %select{non-negative\|strictly positive}1 integer value">;
	def err_omp_not_integral : Error<
	"expression must have integral or unscoped enumeration "
	"type, not %0">;
	def err_omp_threadprivate_in_target : Error<
	"threadprivate variables cannot be used in target constructs">;
	def err_omp_incomplete_type : Error<
	"expression has incomplete class type %0">;
	def err_omp_explicit_conversion : Error<
	"expression requires explicit conversion from %0 to %1">;
	def note_omp_conversion_here : Note<
	"conversion to %select{integral\|enumeration}0 type %1 declared here">;
	def err_omp_ambiguous_conversion : Error<
	"ambiguous conversion from type %0 to an integral or unscoped "
	"enumeration type">;
	def err_omp_iterator_not_integral_or_pointer : Error<
	"expected integral or pointer type as the iterator-type, not %0">;
	def err_omp_iterator_step_not_integral : Error<
	"iterator step expression %0 is not the integral expression">;
	def err_omp_iterator_step_constant_zero : Error<
	"iterator step expression %0 evaluates to 0">;
	def err_omp_required_access : Error<
	"%0 variable must be %1">;
	def err_omp_const_variable : Error<
	"const-qualified variable cannot be %0">;
	def err_omp_const_not_mutable_variable : Error<
	"const-qualified variable without mutable fields cannot be %0">;
	def err_omp_const_list_item : Error<
	"const-qualified list item cannot be %0">;
	def err_omp_linear_incomplete_type : Error<
	"a linear variable with incomplete type %0">;
	def err_omp_linear_expected_int_or_ptr : Error<
	"argument of a linear clause should be of integral or pointer "
	"type, not %0">;
	def warn_omp_linear_step_zero : Warning<
	"zero linear step (%0 %select{\|and other variables in clause }1should probably be const)">,
	InGroup<OpenMPClauses>;
	def warn_omp_alignment_not_power_of_two : Warning<
	"aligned clause will be ignored because the requested alignment is not a power of 2">,
	InGroup<OpenMPClauses>;
	def err_omp_invalid_target_decl : Error<
	"%0 used in declare target directive is not a variable or a function name">;
	def err_omp_declare_target_to_and_link : Error<
	"%0 must not appear in both clauses 'to' and 'link'">;
	def warn_omp_not_in_target_context : Warning<
	"declaration is not declared in any declare target region">,
	InGroup<OpenMPTarget>;
	def err_omp_function_in_link_clause : Error<
	"function name is not allowed in 'link' clause">;
	def err_omp_aligned_expected_array_or_ptr : Error<
	"argument of aligned clause should be array"
	"%select{ or pointer\|, pointer, reference to array or reference to pointer}1"
	", not %0">;
	def err_omp_used_in_clause_twice : Error<
	"%select{a variable\|a parameter\|'this'}0 cannot appear in more than one %1 clause">;
	def err_omp_local_var_in_threadprivate_init : Error<
	"variable with local storage in initial value of threadprivate variable">;
	def err_omp_loop_not_canonical_init : Error<
	"initialization clause of OpenMP for loop is not in canonical form "
	"('var = init' or 'T var = init')">;
	def ext_omp_loop_not_canonical_init : ExtWarn<
	"initialization clause of OpenMP for loop is not in canonical form "
	"('var = init' or 'T var = init')">, InGroup<OpenMPLoopForm>;
	def err_omp_loop_not_canonical_cond : Error<
	"condition of OpenMP for loop must be a relational comparison "
	"('<', '<=', '>', %select{or '>='\|'>=', or '!='}0) of loop variable %1">;
	def err_omp_loop_not_canonical_incr : Error<
	"increment clause of OpenMP for loop must perform simple addition "
	"or subtraction on loop variable %0">;
	def err_omp_loop_variable_type : Error<
	"variable must be of integer or %select{pointer\|random access iterator}0 type">;
	def err_omp_loop_incr_not_compatible : Error<
	"increment expression must cause %0 to %select{decrease\|increase}1 "
	"on each iteration of OpenMP for loop">;
	def note_omp_loop_cond_requres_compatible_incr : Note<
	"loop step is expected to be %select{negative\|positive}0 due to this condition">;
	def err_omp_loop_diff_cxx : Error<
	"could not calculate number of iterations calling 'operator-' with "
	"upper and lower loop bounds">;
	def err_omp_loop_cannot_use_stmt : Error<
	"'%0' statement cannot be used in OpenMP for loop">;
	def err_omp_simd_region_cannot_use_stmt : Error<
	"'%0' statement cannot be used in OpenMP simd region">;
	def warn_omp_loop_64_bit_var : Warning<
	"OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed">,
	InGroup<OpenMPLoopForm>;
	def err_omp_unknown_reduction_identifier : Error<
	"incorrect reduction identifier, expected one of '+', '-', '*', '&', '\|', '^', "
	"'&&', '\|\|', 'min' or 'max' or declare reduction for type %0">;
	def err_omp_not_resolved_reduction_identifier : Error<
	"unable to resolve declare reduction construct for type %0">;
	def err_omp_reduction_ref_type_arg : Error<
	"argument of OpenMP clause '%0' must reference the same object in all threads">;
	def err_omp_clause_not_arithmetic_type_arg : Error<
	"arguments of OpenMP clause '%0' for 'min' or 'max' must be of %select{scalar\|arithmetic}1 type">;
	def err_omp_clause_floating_type_arg : Error<
	"arguments of OpenMP clause '%0' with bitwise operators cannot be of floating type">;
	def err_omp_once_referenced : Error<
	"variable can appear only once in OpenMP '%0' clause">;
	def err_omp_once_referenced_in_target_update : Error<
	"variable can appear only once in OpenMP 'target update' construct">;
	def note_omp_referenced : Note<
	"previously referenced here">;
	def err_omp_reduction_in_task : Error<
	"reduction variables may not be accessed in an explicit task">;
	def err_omp_reduction_id_not_compatible : Error<
	"list item of type %0 is not valid for specified reduction operation: unable to provide default initialization value">;
	def err_omp_reduction_identifier_mismatch : Error<
	"in_reduction variable must have the same reduction operation as in a task_reduction clause">;
	def note_omp_previous_reduction_identifier : Note<
	"previously marked as task_reduction with different reduction operation">;
	def err_omp_prohibited_region : Error<
	"region cannot be%select{\| closely}0 nested inside '%1' region"
	"%select{\|; perhaps you forget to enclose 'omp %3' directive into a parallel region?\|"
	"; perhaps you forget to enclose 'omp %3' directive into a for or a parallel for region with 'ordered' clause?\|"
	"; perhaps you forget to enclose 'omp %3' directive into a target region?\|"
	"; perhaps you forget to enclose 'omp %3' directive into a teams region?\|"
	"; perhaps you forget to enclose 'omp %3' directive into a for, simd, for simd, parallel for, or parallel for simd region?}2">;
	def err_omp_prohibited_region_simd : Error<
	"OpenMP constructs may not be nested inside a simd region%select{\| except for ordered simd, simd, scan, or atomic directive}0">;
	def err_omp_prohibited_region_atomic : Error<
	"OpenMP constructs may not be nested inside an atomic region">;
	def err_omp_prohibited_region_order
	: Error<"construct '%0' not allowed in a region associated with a "
	"directive with 'order' clause">;
	def err_omp_prohibited_region_critical_same_name : Error<
	"cannot nest 'critical' regions having the same name %0">;
	def note_omp_previous_critical_region : Note<
	"previous 'critical' region starts here">;
	def err_omp_several_directives_in_region : Error<
	"exactly one '%0' directive must appear in the loop body of an enclosing directive">;
	def note_omp_previous_directive : Note<
	"previous '%0' directive used here">;
	def err_omp_sections_not_compound_stmt : Error<
	"the statement for '#pragma omp sections' must be a compound statement">;
	def err_omp_parallel_sections_not_compound_stmt : Error<
	"the statement for '#pragma omp parallel sections' must be a compound statement">;
	def err_omp_orphaned_section_directive : Error<
	"%select{orphaned 'omp section' directives are prohibited, it\|'omp section' directive}0"
	" must be closely nested to a sections region%select{\|, not a %1 region}0">;
	def err_omp_sections_substmt_not_section : Error<
	"statement in 'omp sections' directive must be enclosed into a section region">;
	def err_omp_parallel_sections_substmt_not_section : Error<
	"statement in 'omp parallel sections' directive must be enclosed into a section region">;
	def err_omp_parallel_reduction_in_task_firstprivate : Error<
	"argument of a reduction clause of a %0 construct must not appear in a firstprivate clause on a task construct">;
	def err_omp_atomic_read_not_expression_statement : Error<
	"the statement for 'atomic read' must be an expression statement of form 'v = x;',"
	" where v and x are both lvalue expressions with scalar type">;
	def note_omp_atomic_read_write: Note<
	"%select{expected an expression statement\|expected built-in assignment operator\|expected expression of scalar type\|expected lvalue expression}0">;
	def err_omp_atomic_write_not_expression_statement : Error<
	"the statement for 'atomic write' must be an expression statement of form 'x = expr;',"
	" where x is a lvalue expression with scalar type">;
	def err_omp_atomic_update_not_expression_statement : Error<
	"the statement for 'atomic update' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x',"
	" where x is an lvalue expression with scalar type">;
	def err_omp_atomic_not_expression_statement : Error<
	"the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x',"
	" where x is an lvalue expression with scalar type">;
	def note_omp_atomic_update: Note<
	"%select{expected an expression statement\|expected built-in binary or unary operator\|expected unary decrement/increment operation\|"
	"expected expression of scalar type\|expected assignment expression\|expected built-in binary operator\|"
	"expected one of '+', '*', '-', '/', '&', '^', '%\|', '<<', or '>>' built-in operations\|expected in right hand side of expression}0">;
	def err_omp_atomic_capture_not_expression_statement : Error<
	"the statement for 'atomic capture' must be an expression statement of form 'v = ++x;', 'v = --x;', 'v = x++;', 'v = x--;', 'v = x binop= expr;', 'v = x = x binop expr' or 'v = x = expr binop x',"
	" where x and v are both lvalue expressions with scalar type">;
	def err_omp_atomic_capture_not_compound_statement : Error<
	"the statement for 'atomic capture' must be a compound statement of form '{v = x; x binop= expr;}', '{x binop= expr; v = x;}',"
	" '{v = x; x = x binop expr;}', '{v = x; x = expr binop x;}', '{x = x binop expr; v = x;}', '{x = expr binop x; v = x;}' or '{v = x; x = expr;}',"
	" '{v = x; x++;}', '{v = x; ++x;}', '{++x; v = x;}', '{x++; v = x;}', '{v = x; x--;}', '{v = x; --x;}', '{--x; v = x;}', '{x--; v = x;}'"
	" where x is an lvalue expression with scalar type">;
	def note_omp_atomic_capture: Note<
	"%select{expected assignment expression\|expected compound statement\|expected exactly two expression statements\|expected in right hand side of the first expression}0">;
	def err_omp_atomic_compare : Error<
	"the statement for 'atomic compare' must be a compound statement of form '{x = expr ordop x ? expr : x;}', '{x = x ordop expr? expr : x;}',"
	" '{x = x == e ? d : x;}', '{x = e == x ? d : x;}', or 'if(expr ordop x) {x = expr;}', 'if(x ordop expr) {x = expr;}', 'if(x == e) {x = d;}',"
	" 'if(e == x) {x = d;}' where 'x' is an lvalue expression with scalar type, 'expr', 'e', and 'd' are expressions with scalar type,"
	" and 'ordop' is one of '<' or '>'.">;
	def err_omp_atomic_compare_capture : Error<
	"the statement for 'atomic compare capture' must be a compound statement of form '{v = x; cond-up-stmt}', ''{cond-up-stmt v = x;}', '{if(x == e) {x = d;} else {v = x;}}',"
	" '{r = x == e; if(r) {x = d;}}', or '{r = x == e; if(r) {x = d;} else {v = x;}}', where 'cond-update-stmt' can have one of the following forms: 'if(expr ordop x) {x = expr;}',"
	" 'if(x ordop expr) {x = expr;}', 'if(x == e) {x = d;}', or 'if(e == x) {x = d;}' where 'x', 'r', and 'v' are lvalue expressions with scalar type, 'expr', 'e', and 'd' are expressions with scalar type,"
	" and 'ordop' is one of '<' or '>'.">;
	def note_omp_atomic_compare: Note<
	"%select{expected compound statement\|expected exactly one expression statement\|expected assignment statement\|expected conditional operator\|expect result value to be at false expression\|"
	"expect binary operator in conditional expression\|expect '<', '>' or '==' as order operator\|expect comparison in a form of 'x == e', 'e == x', 'x ordop expr', or 'expr ordop x'\|"
	"expect lvalue for result value\|expect scalar value\|expect integer value\|unexpected 'else' statement\|expect '==' operator\|expect an assignment statement 'v = x'\|"
	"expect a 'if' statement\|expect no more than two statements\|expect a compound statement\|expect 'else' statement\|expect a form 'r = x == e; if (r) ...'}0">;
	def err_omp_atomic_several_clauses : Error<
	"directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">;
	def err_omp_several_mem_order_clauses : Error<
	"directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 'relaxed', \|}1'acq_rel', 'acquire' or 'release' clause">;
	def err_omp_atomic_incompatible_mem_order_clause : Error<
	"directive '#pragma omp atomic%select{ %0\|}1' cannot be used with '%2' clause">;
	def note_omp_previous_mem_order_clause : Note<
	"'%0' clause used here">;
	def err_omp_target_contains_not_only_teams : Error<
	"target construct with nested teams region contains statements outside of the teams construct">;
	def note_omp_nested_teams_construct_here : Note<
	"nested teams construct here">;
	def note_omp_nested_statement_here : Note<
	"%select{statement\|directive}0 outside teams construct here">;
	def err_omp_single_copyprivate_with_nowait : Error<
	"the 'copyprivate' clause must not be used with the 'nowait' clause">;
	def err_omp_nowait_clause_without_depend: Error<
	"directive '#pragma omp taskwait' cannot use 'nowait' clause without 'depend' clause">;
	def note_omp_nowait_clause_here : Note<
	"'nowait' clause is here">;
	def err_omp_single_decl_in_declare_simd_variant : Error<
	"single declaration is expected after 'declare %select{simd\|variant}0' directive">;
	def err_omp_function_expected : Error<
	"'#pragma omp declare %select{simd\|variant}0' can only be applied to functions">;
	def err_omp_wrong_cancel_region : Error<
	"one of 'for', 'parallel', 'sections' or 'taskgroup' is expected">;
	def err_omp_parent_cancel_region_nowait : Error<
	"parent region for 'omp %select{cancellation point\|cancel}0' construct cannot be nowait">;
	def err_omp_parent_cancel_region_ordered : Error<
	"parent region for 'omp %select{cancellation point\|cancel}0' construct cannot be ordered">;
	def err_omp_reduction_wrong_type : Error<"reduction type cannot be %select{qualified with 'const', 'volatile' or 'restrict'\|a function\|a reference\|an array}0 type">;
	def err_omp_wrong_var_in_declare_reduction : Error<"only %select{'omp_priv' or 'omp_orig'\|'omp_in' or 'omp_out'}0 variables are allowed in %select{initializer\|combiner}0 expression">;
	def err_omp_declare_reduction_redefinition : Error<"redefinition of user-defined reduction for type %0">;
	def err_omp_mapper_wrong_type : Error<
	"mapper type must be of struct, union or class type">;
	def err_omp_declare_mapper_wrong_var : Error<
	"only variable %0 is allowed in map clauses of this 'omp declare mapper' directive">;
	def err_omp_declare_mapper_redefinition : Error<
	"redefinition of user-defined mapper for type %0 with name %1">;
	def err_omp_invalid_mapper: Error<
	"cannot find a valid user-defined mapper for type %0 with name %1">;
	def err_omp_array_section_use : Error<"OpenMP array section is not allowed here">;
	def err_omp_array_shaping_use : Error<"OpenMP array shaping operation is not allowed here">;
	def err_omp_iterator_use : Error<"OpenMP iterator is not allowed here">;
	def err_omp_typecheck_section_value : Error<
	"subscripted value is not an array or pointer">;
	def err_omp_typecheck_section_not_integer : Error<
	"array section %select{lower bound\|length}0 is not an integer">;
	def err_omp_typecheck_shaping_not_integer : Error<
	"array shaping operation dimension is not an integer">;
	def err_omp_shaping_dimension_not_positive : Error<
	"array shaping dimension is evaluated to a non-positive value %0">;
	def err_omp_section_function_type : Error<
	"section of pointer to function type %0">;
	def warn_omp_section_is_char : Warning<"array section %select{lower bound\|length}0 is of type 'char'">,
	InGroup<CharSubscript>, DefaultIgnore;
	def err_omp_section_incomplete_type : Error<
	"section of pointer to incomplete type %0">;
	def err_omp_section_not_subset_of_array : Error<
	"array section must be a subset of the original array">;
	def err_omp_section_length_negative : Error<
	"section length is evaluated to a negative value %0">;
	def err_omp_section_stride_non_positive : Error<
	"section stride is evaluated to a non-positive value %0">;
	def err_omp_section_length_undefined : Error<
	"section length is unspecified and cannot be inferred because subscripted value is %select{not an array\|an array of unknown bound}0">;
	def err_omp_wrong_linear_modifier : Error<
	"expected %select{'val' modifier\|one of 'ref', val' or 'uval' modifiers}0">;
	def err_omp_wrong_linear_modifier_non_reference : Error<
	"variable of non-reference type %0 can be used only with 'val' modifier, but used with '%1'">;
	def err_omp_wrong_simdlen_safelen_values : Error<
	"the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter">;
	def err_omp_wrong_if_directive_name_modifier : Error<
	"directive name modifier '%0' is not allowed for '#pragma omp %1'">;
	def err_omp_no_more_if_clause : Error<
	"no more 'if' clause is allowed">;
	def err_omp_unnamed_if_clause : Error<
	"expected%select{\| one of}0 %1 directive name modifier%select{\|s}0">;
	def note_omp_previous_named_if_clause : Note<
	"previous clause with directive name modifier specified here">;
	def err_omp_ordered_directive_with_param : Error<
	"'ordered' directive %select{without any clauses\|with 'threads' clause}0 cannot be closely nested inside ordered region with specified parameter">;
	def err_omp_ordered_directive_without_param : Error<
	"'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter">;
	def note_omp_ordered_param : Note<
	"'ordered' clause%select{\| with specified parameter}0">;
	def err_omp_expected_base_var_name : Error<
	"expected variable name as a base of the array %select{subscript\|section}0">;
	def err_omp_map_shared_storage : Error<
	"variable already marked as mapped in current construct">;
	def err_omp_invalid_map_type_for_directive : Error<
	"%select{map type '%1' is not allowed\|map type must be specified}0 for '#pragma omp %2'">;
	def err_omp_invalid_map_type_modifier_for_directive : Error<
	"map type modifier '%0' is not allowed for '#pragma omp %1'">;
	def err_omp_no_clause_for_directive : Error<
	"expected at least one %0 clause for '#pragma omp %1'">;
	def err_omp_threadprivate_in_clause : Error<
	"threadprivate variables are not allowed in '%0' clause">;
	def err_omp_wrong_ordered_loop_count : Error<
	"the parameter of the 'ordered' clause must be greater than or equal to the parameter of the 'collapse' clause">;
	def note_collapse_loop_count : Note<
	"parameter of the 'collapse' clause">;
	def err_omp_clauses_mutually_exclusive : Error<
	"'%0' and '%1' clause are mutually exclusive and may not appear on the same directive">;
	def note_omp_previous_clause : Note<
	"'%0' clause is specified here">;
	def err_omp_hint_clause_no_name : Error<
	"the name of the construct must be specified in presence of 'hint' clause">;
	def err_omp_critical_with_hint : Error<
	"constructs with the same name must have a 'hint' clause with the same value">;
	def note_omp_critical_hint_here : Note<
	"%select{\|previous }0'hint' clause with value '%1'">;
	def note_omp_critical_no_hint : Note<
	"%select{\|previous }0directive with no 'hint' clause specified">;
	def err_omp_depend_clause_thread_simd : Error<
	"'depend' clauses cannot be mixed with '%0' clause">;
	def err_omp_depend_sink_expected_loop_iteration : Error<
	"expected%select{\| %1}0 loop iteration variable">;
	def err_omp_depend_sink_unexpected_expr : Error<
	"unexpected expression: number of expressions is larger than the number of associated loops">;
	def err_omp_depend_sink_expected_plus_minus : Error<
	"expected '+' or '-' operation">;
	def err_omp_taskwait_depend_mutexinoutset_not_allowed : Error<
	"'mutexinoutset' modifier not allowed in 'depend' clause on 'taskwait' directive">;
	def err_omp_depend_sink_source_not_allowed : Error<
	"'depend(%select{source\|sink:vec}0)' clause%select{\|s}0 cannot be mixed with 'depend(%select{sink:vec\|source}0)' clause%select{s\|}0">;
	def err_omp_depend_zero_length_array_section_not_allowed : Error<
	"zero-length array section is not allowed in 'depend' clause">;
	def err_omp_depend_sink_source_with_modifier : Error<
	"depend modifier cannot be used with 'sink' or 'source' depend type">;
	def err_omp_depend_modifier_not_iterator : Error<
	"expected iterator specification as depend modifier">;
	def err_omp_map_modifier_not_iterator : Error<
	"expected iterator specification as map modifier">;
	def err_omp_linear_ordered : Error<
	"'linear' clause cannot be specified along with 'ordered' clause with a parameter">;
	def err_omp_unexpected_schedule_modifier : Error<
	"modifier '%0' cannot be used along with modifier '%1'">;
	def err_omp_schedule_nonmonotonic_static : Error<
	"'nonmonotonic' modifier can only be specified with 'dynamic' or 'guided' schedule kind">;
	def err_omp_simple_clause_incompatible_with_ordered : Error<
	"'%0' clause with '%1' modifier cannot be specified if an 'ordered' clause is specified">;
	def err_omp_ordered_simd : Error<
	"'ordered' clause with a parameter can not be specified in '#pragma omp %0' directive">;
	def err_omp_variable_in_given_clause_and_dsa : Error<
	"%0 variable cannot be in a %1 clause in '#pragma omp %2' directive">;
	def err_omp_param_or_this_in_clause : Error<
	"expected reference to one of the parameters of function %0%select{\| or 'this'}1">;
	def err_omp_expected_uniform_param : Error<
	"expected a reference to a parameter specified in a 'uniform' clause">;
	def err_omp_expected_int_param : Error<
	"expected a reference to an integer-typed parameter">;
	def err_omp_at_least_one_motion_clause_required : Error<
	"expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'">;
	def err_omp_cannot_update_with_internal_linkage : Error<
	"the host cannot update a declare target variable that is not externally visible.">;
	def err_omp_usedeviceptr_not_a_pointer : Error<
	"expected pointer or reference to pointer in 'use_device_ptr' clause">;
	def err_omp_argument_type_isdeviceptr : Error <
	"expected pointer, array, reference to pointer, or reference to array in 'is_device_ptr clause'">;
	def warn_omp_nesting_simd : Warning<
	"OpenMP only allows an ordered construct with the simd clause nested in a simd construct">,
	InGroup<SourceUsesOpenMP>;
	def err_omp_orphaned_device_directive : Error<
	"orphaned 'omp %0' directives are prohibited"
	"; perhaps you forget to enclose the directive into a "
	"%select{\|\|\|target \|teams\|for, simd, for simd, parallel for, or parallel for simd }1region?">;
	def err_omp_reduction_non_addressable_expression : Error<
	"expected addressable reduction item for the task-based directives">;
	def err_omp_reduction_with_nogroup : Error<
	"'reduction' clause cannot be used with 'nogroup' clause">;
	def err_omp_reduction_vla_unsupported : Error<
	"cannot generate code for reduction on %select{\|array section, which requires a }0variable length array">;
	def err_omp_linear_distribute_var_non_loop_iteration : Error<
	"only loop iteration variables are allowed in 'linear' clause in distribute directives">;
	def warn_omp_non_trivial_type_mapped : Warning<
	"Type %0 is not trivially copyable and not guaranteed to be mapped correctly">,
	InGroup<OpenMPMapping>;
	def err_omp_requires_clause_redeclaration : Error <
	"Only one %0 clause can appear on a requires directive in a single translation unit">;
	def note_omp_requires_previous_clause : Note <
	"%0 clause previously used here">;
	def err_omp_directive_before_requires : Error <
	"'%0' region encountered before requires directive with '%1' clause">;
	def note_omp_requires_encountered_directive : Note <
	"'%0' previously encountered here">;
	def err_omp_device_ancestor_without_requires_reverse_offload : Error <
	"Device clause with ancestor device-modifier used without specifying 'requires reverse_offload'">;
	def err_omp_invalid_scope : Error <
	"'#pragma omp %0' directive must appear only in file scope">;
	def note_omp_invalid_length_on_this_ptr_mapping : Note <
	"expected length on mapping of 'this' array section expression to be '1'">;
	def note_omp_invalid_lower_bound_on_this_ptr_mapping : Note <
	"expected lower bound on mapping of 'this' array section expression to be '0' or not specified">;
	def note_omp_invalid_subscript_on_this_ptr_map : Note <
	"expected 'this' subscript expression on map clause to be 'this[0]'">;
	def err_omp_invalid_map_this_expr : Error <
	"invalid 'this' expression on 'map' clause">;
	def err_omp_implied_type_not_found : Error<
	"'%0' type not found; include <omp.h>">;
	def err_omp_expected_omp_depend_t_lvalue : Error<
	"expected lvalue expression%select{ of 'omp_depend_t' type, not %1\|}0">;
	def err_omp_depobj_expected : Error<
	"expected depobj expression">;
	def err_omp_depobj_single_clause_expected : Error<
	"exactly one of 'depend', 'destroy', or 'update' clauses is expected">;
	def err_omp_scan_single_clause_expected : Error<
	"exactly one of 'inclusive' or 'exclusive' clauses is expected">;
	def err_omp_inclusive_exclusive_not_reduction : Error<
	"the list item must appear in 'reduction' clause with the 'inscan' modifier "
	"of the parent directive">;
	def err_omp_reduction_not_inclusive_exclusive : Error<
	"the inscan reduction list item must appear as a list item in an 'inclusive' or"
	" 'exclusive' clause on an inner 'omp scan' directive">;
	def err_omp_wrong_inscan_reduction : Error<
	"'inscan' modifier can be used only in 'omp for', 'omp simd', 'omp for simd',"
	" 'omp parallel for', or 'omp parallel for simd' directive">;
	def err_omp_inscan_reduction_expected : Error<
	"expected 'reduction' clause with the 'inscan' modifier">;
	def note_omp_previous_inscan_reduction : Note<
	"'reduction' clause with 'inscan' modifier is used here">;
	def err_omp_expected_predefined_allocator : Error<
	"expected one of the predefined allocators for the variables with the static "
	"storage: 'omp_default_mem_alloc', 'omp_large_cap_mem_alloc', "
	"'omp_const_mem_alloc', 'omp_high_bw_mem_alloc', 'omp_low_lat_mem_alloc', "
	"'omp_cgroup_mem_alloc', 'omp_pteam_mem_alloc' or 'omp_thread_mem_alloc'">;
	def warn_omp_used_different_allocator : Warning<
	"allocate directive specifies %select{default\|'%1'}0 allocator while "
	"previously used %select{default\|'%3'}2">,
	InGroup<OpenMPClauses>;
	def note_omp_previous_allocator : Note<
	"previous allocator is specified here">;
	def err_expected_allocator_clause : Error<"expected an 'allocator' clause "
	"inside of the target region; provide an 'allocator' clause or use 'requires'"
	" directive with the 'dynamic_allocators' clause">;
	def err_expected_allocator_expression : Error<"expected an allocator expression "
	"inside of the target region; provide an allocator expression or use 'requires'"
	" directive with the 'dynamic_allocators' clause">;
	def warn_omp_allocate_thread_on_task_target_directive : Warning<
	"allocator with the 'thread' trait access has unspecified behavior on '%0' directive">,
	InGroup<OpenMPClauses>;
	def err_omp_expected_private_copy_for_allocate : Error<
	"the referenced item is not found in any private clause on the same directive">;
	def err_omp_stmt_depends_on_loop_counter : Error<
	"the loop %select{initializer\|condition}0 expression depends on the current loop control variable">;
	def err_omp_invariant_dependency : Error<
	"expected loop invariant expression">;
	def err_omp_invariant_or_linear_dependency : Error<
	"expected loop invariant expression or '<invariant1> * %0 + <invariant2>' kind of expression">;
	def err_omp_wrong_dependency_iterator_type : Error<
	"expected an integer or a pointer type of the outer loop counter '%0' for non-rectangular nests">;
	def err_target_unsupported_type
	: Error<"%0 requires %select{\|%2 bit size}1 %3 %select{\|return }4type support,"
	" but target '%5' does not support it">;
	def err_omp_lambda_capture_in_declare_target_not_to : Error<
	"variable captured in declare target region must appear in a to clause">;
	def err_omp_device_type_mismatch : Error<
	"'device_type(%0)' does not match previously specified 'device_type(%1)' for the same declaration">;
	def err_omp_wrong_device_function_call : Error<
	"function with 'device_type(%0)' is not available on %select{device\|host}1">;
	def note_omp_marked_device_type_here : Note<"marked as 'device_type(%0)' here">;
	def warn_omp_declare_target_after_first_use : Warning<
	"declaration marked as declare target after first use, it may lead to incorrect results">,
	InGroup<OpenMPTarget>;
	def err_omp_declare_variant_incompat_attributes : Error<
	"'#pragma omp declare variant' is not compatible with any target-specific attributes">;
	def warn_omp_declare_variant_score_not_constant
	: Warning<"score expressions in the OpenMP context selector need to be "
	"constant; %0 is not and will be ignored">,
	InGroup<SourceUsesOpenMP>;
	def err_omp_declare_variant_user_condition_not_constant
	: Error<"the user condition in the OpenMP context selector needs to be "
	"constant; %0 is not">;
	def warn_omp_declare_variant_after_used : Warning<
	"'#pragma omp declare variant' cannot be applied for function after first "
	"usage; the original function might be used">, InGroup<SourceUsesOpenMP>;
	def warn_omp_declare_variant_after_emitted : Warning<
	"'#pragma omp declare variant' cannot be applied to the function that was defined already;"
	" the original function might be used">, InGroup<SourceUsesOpenMP>;
	def err_omp_declare_variant_doesnt_support : Error<
	"'#pragma omp declare variant' does not "
	"support %select{function templates\|virtual functions\|"
	"deduced return types\|constructors\|destructors\|deleted functions\|"
	"defaulted functions\|constexpr functions\|consteval function}0">;
	def err_omp_declare_variant_diff : Error<
	"function with '#pragma omp declare variant' has a different %select{calling convention"
	"\|return type\|constexpr specification\|inline specification\|storage class\|"
	"linkage}0">;
	def err_omp_declare_variant_prototype_required : Error<
	"function with '#pragma omp declare variant' must have a prototype when "
	"'append_args' is used">;
	def err_omp_interop_type_not_found : Error<
	"'omp_interop_t' must be defined when 'append_args' clause is used; include <omp.h>">;
	def err_omp_declare_variant_incompat_types : Error<
	"variant in '#pragma omp declare variant' with type %0 is incompatible with"
	" type %1%select{\| with appended arguments}2">;
	def err_omp_declare_variant_same_base_function : Error<
	"variant in '#pragma omp declare variant' is the same as the base function">;
	def warn_omp_declare_variant_marked_as_declare_variant : Warning<
	"variant function in '#pragma omp declare variant' is itself marked as '#pragma omp declare variant'"
	>, InGroup<SourceUsesOpenMP>;
	def note_omp_marked_declare_variant_here : Note<"marked as 'declare variant' here">;
	def err_omp_one_defaultmap_each_category: Error<
	"at most one defaultmap clause for each variable-category can appear on the directive">;
	def err_omp_lastprivate_conditional_non_scalar : Error<
	"expected list item of scalar type in 'lastprivate' clause with 'conditional' modifier"
	>;
	def err_omp_flush_order_clause_and_list : Error<
	"'flush' directive with memory order clause '%0' cannot have the list">;
	def note_omp_flush_order_clause_here : Note<
	"memory order clause '%0' is specified here">;
	def err_omp_non_lvalue_in_map_or_motion_clauses: Error<
	"expected addressable lvalue in '%0' clause">;
	def err_omp_var_expected : Error<
	"expected variable of the '%0' type%select{\|, not %2}1">;
	def err_omp_non_pointer_type_array_shaping_base : Error<
	"expected expression with a pointer to a complete type as a base of an array "
	"shaping operation">;
	def err_omp_reduction_task_not_parallel_or_worksharing : Error<
	"'reduction' clause with 'task' modifier allowed only on non-simd parallel or"
	" worksharing constructs">;
	def err_omp_expected_array_alloctraits : Error<
	"expected constant sized array of 'omp_alloctrait_t' elements, not %0">;
	def err_omp_predefined_allocator_with_traits : Error<
	"predefined allocator cannot have traits specified">;
	def note_omp_predefined_allocator : Note<
	"predefined trait '%0' used here">;
	def err_omp_nonpredefined_allocator_without_traits : Error<
	"non-predefined allocator must have traits specified">;
	def err_omp_allocator_used_in_clauses : Error<
	"allocators used in 'uses_allocators' clause cannot appear in other "
	"data-sharing or data-mapping attribute clauses">;
	def err_omp_allocator_not_in_uses_allocators : Error<
	"allocator must be specified in the 'uses_allocators' clause">;
	def note_omp_protected_structured_block
	: Note<"jump bypasses OpenMP structured block">;
	def note_omp_exits_structured_block
	: Note<"jump exits scope of OpenMP structured block">;
	def err_omp_lastprivate_loop_var_non_loop_iteration : Error<
	"only loop iteration variables are allowed in 'lastprivate' clause in "
	"'omp %0' directives">;
	def err_omp_interop_variable_expected : Error<
	"expected%select{\| non-const}0 variable of type 'omp_interop_t'">;
	def err_omp_interop_variable_wrong_type : Error<
	"interop variable must be of type 'omp_interop_t'">;
	def err_omp_interop_prefer_type : Error<
	"prefer_list item must be a string literal or constant integral "
	"expression">;
	def err_omp_interop_bad_depend_clause : Error<
	"'depend' clause requires the 'targetsync' interop type">;
	def err_omp_interop_var_multiple_actions : Error<
	"interop variable %0 used in multiple action clauses">;
	def err_omp_dispatch_statement_call
	: Error<"statement after '#pragma omp dispatch' must be a direct call"
	" to a target function or an assignment to one">;
	def err_omp_unroll_full_variable_trip_count : Error<
	"loop to be fully unrolled must have a constant trip count">;
	def note_omp_directive_here : Note<"'%0' directive found here">;
	def err_omp_instantiation_not_supported
	: Error<"instantiation of '%0' not supported yet">;
	def err_omp_adjust_arg_multiple_clauses : Error<
	"'adjust_arg' argument %0 used in multiple clauses">;
	def err_omp_clause_requires_dispatch_construct : Error<
	"'%0' clause requires 'dispatch' context selector">;
	def err_omp_append_args_with_varargs : Error<
	"'append_args' is not allowed with varargs functions">;
	def err_openmp_vla_in_task_untied : Error<
	"variable length arrays are not supported in OpenMP tasking regions with 'untied' clause">;
	def warn_omp_unterminated_declare_target : Warning<
	"expected '#pragma omp end declare target' at end of file to match '#pragma omp %0'">,
	InGroup<SourceUsesOpenMP>;
	} // end of OpenMP category

	let CategoryName = "Related Result Type Issue" in {
	// Objective-C related result type compatibility
	def warn_related_result_type_compatibility_class : Warning<
	"method is expected to return an instance of its class type "
	"%diff{$, but is declared to return $\|"
	", but is declared to return different type}0,1">;
	def warn_related_result_type_compatibility_protocol : Warning<
	"protocol method is expected to return an instance of the implementing "
	"class, but is declared to return %0">;
	def note_related_result_type_family : Note<
	"%select{overridden\|current}0 method is part of the '%select{\|alloc\|copy\|init\|"
	"mutableCopy\|new\|autorelease\|dealloc\|finalize\|release\|retain\|retainCount\|"
	"self}1' method family%select{\| and is expected to return an instance of its "
	"class type}0">;
	def note_related_result_type_overridden : Note<
	"overridden method returns an instance of its class type">;
	def note_related_result_type_inferred : Note<
	"%select{class\|instance}0 method %1 is assumed to return an instance of "
	"its receiver type (%2)">;
	def note_related_result_type_explicit : Note<
	"%select{overridden\|current}0 method is explicitly declared 'instancetype'"
	"%select{\| and is expected to return an instance of its class type}0">;
	def err_invalid_type_for_program_scope_var : Error<
	"the %0 type cannot be used to declare a program scope variable">;

	}

	let CategoryName = "Modules Issue" in {
	def err_module_decl_in_module_map_module : Error<
	"'module' declaration found while building module from module map">;
	def err_module_decl_in_header_unit : Error<
	"'module' declaration found while building header unit">;
	def err_module_interface_implementation_mismatch : Error<
	"missing 'export' specifier in module declaration while "
	"building module interface">;
	def err_current_module_name_mismatch : Error<
	"module name '%0' specified on command line does not match name of module">;
	def err_module_redefinition : Error<
	"redefinition of module '%0'">;
	def note_prev_module_definition : Note<"previously defined here">;
	def note_prev_module_definition_from_ast_file : Note<"module loaded from '%0'">;
	def err_module_not_defined : Error<
	"definition of module '%0' is not available; use -fmodule-file= to specify "
	"path to precompiled module interface">;
	def err_module_redeclaration : Error<
	"translation unit contains multiple module declarations">;
	def note_prev_module_declaration : Note<"previous module declaration is here">;
	def err_module_declaration_missing : Error<
	"missing 'export module' declaration in module interface unit">;
	def err_module_declaration_missing_after_global_module_introducer : Error<
	"missing 'module' declaration at end of global module fragment "
	"introduced here">;
	def err_module_private_specialization : Error<
	"%select{template\|partial\|member}0 specialization cannot be "
	"declared __module_private__">;
	def err_module_private_local : Error<
	"%select{local variable\|parameter\|typedef}0 %1 cannot be declared "
	"__module_private__">;
	def err_module_private_local_class : Error<
	"local %select{struct\|interface\|union\|class\|enum}0 cannot be declared "
	"__module_private__">;
	def err_module_unimported_use : Error<
	"%select{declaration\|definition\|default argument\|"
	"explicit specialization\|partial specialization}0 of %1 must be imported "
	"from module '%2' before it is required">;
	def err_module_unimported_use_header : Error<
	"%select{missing '#include'\|missing '#include %3'}2; "
	"%select{\|\|default argument of \|explicit specialization of \|"
	"partial specialization of }0%1 must be "
	"%select{declared\|defined\|defined\|declared\|declared}0 "
	"before it is used">;
	def err_module_unimported_use_multiple : Error<
	"%select{declaration\|definition\|default argument\|"
	"explicit specialization\|partial specialization}0 of %1 must be imported "
	"from one of the following modules before it is required:%2">;
	def note_unreachable_entity : Note<
	"%select{declaration\|definition\|default argument declared\|"
	"explicit specialization declared\|partial specialization declared}0 here "
	"is not %select{visible\|reachable\|reachable\|reachable\|reachable\|reachable}0">;
	def ext_module_import_in_extern_c : ExtWarn<
	"import of C++ module '%0' appears within extern \"C\" language linkage "
	"specification">, DefaultError,
	InGroup<DiagGroup<"module-import-in-extern-c">>;
	def err_module_import_not_at_top_level_fatal : Error<
	"import of module '%0' appears within %1">, DefaultFatal;
	def ext_module_import_not_at_top_level_noop : ExtWarn<
	"redundant #include of module '%0' appears within %1">, DefaultError,
	InGroup<DiagGroup<"modules-import-nested-redundant">>;
	def note_module_import_not_at_top_level : Note<"%0 begins here">;
	def err_module_self_import : Error<
	"import of module '%0' appears within same top-level module '%1'">;
	def err_module_self_import_cxx20 : Error<
	"import of module '%0' appears within its own %select{interface\|implementation}1">;
	def err_module_import_in_implementation : Error<
	"@import of module '%0' in implementation of '%1'; use #import">;

	// C++ Modules
	def err_module_decl_not_at_start : Error<
	"module declaration must occur at the start of the translation unit">;
	def note_global_module_introducer_missing : Note<
	"add 'module;' to the start of the file to introduce a "
	"global module fragment">;
	def err_export_within_anonymous_namespace : Error<
	"export declaration appears within anonymous namespace">;
	def note_anonymous_namespace : Note<"anonymous namespace begins here">;
	def ext_export_no_name_block : ExtWarn<
	"ISO C++20 does not permit %select{an empty\|a static_assert}0 declaration "
	"to appear in an export block">, InGroup<ExportUnnamed>;
	def ext_export_no_names : ExtWarn<
	"ISO C++20 does not permit a declaration that does not introduce any names "
	"to be exported">, InGroup<ExportUnnamed>;
	def introduces_no_names : Error<
	"declaration does not introduce any names to be exported">;
	def note_export : Note<"export block begins here">;
	def err_export_no_name : Error<
	"%select{empty\|static_assert\|asm}0 declaration cannot be exported">;
	def ext_export_using_directive : ExtWarn<
	"ISO C++20 does not permit using directive to be exported">,
	InGroup<DiagGroup<"export-using-directive">>;
	def err_export_within_export : Error<
	"export declaration appears within another export declaration">;
	def err_export_internal : Error<
	"declaration of %0 with internal linkage cannot be exported">;
	def err_export_using_internal : Error<
	"using declaration referring to %1 with %select{internal\|module\|unknown}0 "
	"linkage cannot be exported">;
	def err_export_not_in_module_interface : Error<
	"export declaration can only be used within a module interface unit"
	"%select{ after the module declaration\|}0">;
	def err_export_inline_not_defined : Error<
	"inline function not defined%select{\| before the private module fragment}0">;
	def err_export_partition_impl : Error<
	"module partition implementations cannot be exported">;
	def err_export_in_private_module_fragment : Error<
	"export declaration cannot be used in a private module fragment">;
	def note_private_module_fragment : Note<
	"private module fragment begins here">;
	def err_private_module_fragment_not_module : Error<
	"private module fragment declaration with no preceding module declaration">;
	def err_private_module_fragment_redefined : Error<
	"private module fragment redefined">;
	def err_private_module_fragment_not_module_interface : Error<
	"private module fragment in module implementation unit">;
	def note_not_module_interface_add_export : Note<
	"add 'export' here if this is intended to be a module interface unit">;
	def err_invalid_module_name : Error<
	"%0 is %select{an invalid\|a reserved}1 name for a module">;
	def err_extern_def_in_header_unit : Error<
	"non-inline external definitions are not permitted in C++ header units">;

	def ext_equivalent_internal_linkage_decl_in_modules : ExtWarn<
	"ambiguous use of internal linkage declaration %0 defined in multiple modules">,
	InGroup<DiagGroup<"modules-ambiguous-internal-linkage">>;
	def note_equivalent_internal_linkage_decl : Note<
	"declared here%select{ in module '%1'\|}0">;

	def note_redefinition_modules_same_file : Note<
	"'%0' included multiple times, additional include site in header from module '%1'">;
	def note_redefinition_include_same_file : Note<
	"'%0' included multiple times, additional include site here">;
	}

	let CategoryName = "Coroutines Issue" in {
	def err_return_in_coroutine : Error<
	"return statement not allowed in coroutine; did you mean 'co_return'?">;
	def note_declared_coroutine_here : Note<
	"function is a coroutine due to use of '%0' here">;
	def err_coroutine_objc_method : Error<
	"Objective-C methods as coroutines are not yet supported">;
	def err_coroutine_unevaluated_context : Error<
	"'%0' cannot be used in an unevaluated context">;
	def err_coroutine_within_handler : Error<
	"'%0' cannot be used in the handler of a try block">;
	def err_coroutine_outside_function : Error<
	"'%0' cannot be used outside a function">;
	def err_coroutine_invalid_func_context : Error<
	"'%1' cannot be used in %select{a constructor\|a destructor"
	"\|the 'main' function\|a constexpr function"
	"\|a function with a deduced return type\|a varargs function"
	"\|a consteval function}0">;
	def err_implied_coroutine_type_not_found : Error<
	"%0 type was not found; include <coroutine> before defining "
	"a coroutine; include <experimental/coroutine> if your version "
	"of libcxx is less than 14.0">;
	def warn_deprecated_coroutine_namespace : Warning<
	"support for 'std::experimental::%0' will be removed in Clang 17; "
	"use 'std::%0' instead">,
	InGroup<DeprecatedExperimentalCoroutine>;
	def err_mixed_use_std_and_experimental_namespace_for_coroutine : Error<
	"conflicting mixed use of std and std::experimental namespaces for "
	"coroutine components">;
	def err_implicit_coroutine_std_nothrow_type_not_found : Error<
	"std::nothrow was not found; include <new> before defining a coroutine which "
	"uses get_return_object_on_allocation_failure()">;
	def err_malformed_std_nothrow : Error<
	"std::nothrow must be a valid variable declaration">;
	def err_malformed_std_coroutine_handle : Error<
	"std::coroutine_handle isn't a class template">;
	def err_coroutine_handle_missing_member : Error<
	"std::coroutine_handle must have a member named '%0'">;
	def err_malformed_std_coroutine_traits : Error<
	"std::coroutine_traits isn't a class template">;
	def err_implied_std_coroutine_traits_promise_type_not_found : Error<
	"this function cannot be a coroutine: %q0 has no member named 'promise_type'">;
	def err_implied_std_coroutine_traits_promise_type_not_class : Error<
	"this function cannot be a coroutine: %0 is not a class">;
	def err_coroutine_promise_type_incomplete : Error<
	"this function cannot be a coroutine: %0 is an incomplete type">;
	def err_coroutine_type_missing_specialization : Error<
	"this function cannot be a coroutine: missing definition of "
	"specialization %0">;
	def err_coroutine_promise_incompatible_return_functions : Error<
	"the coroutine promise type %0 declares both 'return_value' and 'return_void'">;
	def note_coroutine_promise_implicit_await_transform_required_here : Note<
	"call to 'await_transform' implicitly required by 'co_await' here">;
	def note_coroutine_promise_suspend_implicitly_required : Note<
	"call to '%select{initial_suspend\|final_suspend}0' implicitly "
	"required by the %select{initial suspend point\|final suspend point}0">;
	def err_coroutine_promise_unhandled_exception_required : Error<
	"%0 is required to declare the member 'unhandled_exception()'">;
	def warn_coroutine_promise_unhandled_exception_required_with_exceptions : Warning<
	"%0 is required to declare the member 'unhandled_exception()' when exceptions are enabled">,
	InGroup<CoroutineMissingUnhandledException>;
	def err_coroutine_promise_get_return_object_on_allocation_failure : Error<
	"%0: 'get_return_object_on_allocation_failure()' must be a static member function">;
	def err_seh_in_a_coroutine_with_cxx_exceptions : Error<
	"cannot use SEH '__try' in a coroutine when C++ exceptions are enabled">;
	def err_coroutine_promise_new_requires_nothrow : Error<
	"%0 is required to have a non-throwing noexcept specification when the promise "
	"type declares 'get_return_object_on_allocation_failure()'">;
	def note_coroutine_promise_call_implicitly_required : Note<
	"call to %0 implicitly required by coroutine function here">;
	def err_await_suspend_invalid_return_type : Error<
	"return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)"
	>;
	def note_await_ready_no_bool_conversion : Note<
	"return type of 'await_ready' is required to be contextually convertible to 'bool'"
	>;
	def warn_coroutine_handle_address_invalid_return_type : Warning <
	"return type of 'coroutine_handle<>::address should be 'void*' (have %0) in order to get capability with existing async C API.">,
	InGroup<Coroutine>;
	def err_coroutine_promise_final_suspend_requires_nothrow : Error<
	"the expression 'co_await __promise.final_suspend()' is required to be non-throwing"
	>;
	def note_coroutine_function_declare_noexcept : Note<
	"must be declared with 'noexcept'"
	>;
	def warn_always_inline_coroutine : Warning<
	"this coroutine may be split into pieces; not every piece is guaranteed to be inlined"
	>,
	InGroup<AlwaysInlineCoroutine>;
	def err_coroutine_unusable_new : Error<
	"'operator new' provided by %0 is not usable with the function signature of %1"
	>;
	def err_coroutine_unfound_nothrow_new : Error <
	"unable to find %select{'::operator new(size_t, nothrow_t)'\|"
	"'::operator new(size_t, align_val_t, nothrow_t)'}1 for %0"
	>;
	def warn_non_aligned_allocation_function : Warning <
	"under -fcoro-aligned-allocation, the non-aligned allocation function "
	"for the promise type %0 has higher precedence than the global aligned "
	"allocation function">,
	InGroup<CoroNonAlignedAllocationFunction>;
	def err_conflicting_aligned_options : Error <
	"conflicting option '-fcoro-aligned-allocation' and '-fno-aligned-allocation'"
	>;
	def err_coro_invalid_addr_of_label : Error<
	"the GNU address of label extension is not allowed in coroutines."
	>;
	} // end of coroutines issue category

	let CategoryName = "Documentation Issue" in {
	def warn_not_a_doxygen_trailing_member_comment : Warning<
	"not a Doxygen trailing comment">, InGroup<Documentation>, DefaultIgnore;
	} // end of documentation issue category

	let CategoryName = "Nullability Issue" in {

	def warn_mismatched_nullability_attr : Warning<
	"nullability specifier %0 conflicts with existing specifier %1">,
	InGroup<Nullability>;

	def warn_nullability_declspec : Warning<
	"nullability specifier %0 cannot be applied "
	"to non-pointer type %1; did you mean to apply the specifier to the "
	"%select{pointer\|block pointer\|member pointer\|function pointer\|"
	"member function pointer}2?">,
	InGroup<NullabilityDeclSpec>,
	DefaultError;

	def note_nullability_here : Note<"%0 specified here">;

	def err_nullability_nonpointer : Error<
	"nullability specifier %0 cannot be applied to non-pointer type %1">;

	def warn_nullability_lost : Warning<
	"implicit conversion from nullable pointer %0 to non-nullable pointer "
	"type %1">,
	InGroup<NullableToNonNullConversion>, DefaultIgnore;
	def warn_zero_as_null_pointer_constant : Warning<
	"zero as null pointer constant">,
	InGroup<DiagGroup<"zero-as-null-pointer-constant">>, DefaultIgnore;

	def err_nullability_cs_multilevel : Error<
	"nullability keyword %0 cannot be applied to multi-level pointer type %1">;
	def note_nullability_type_specifier : Note<
	"use nullability type specifier %0 to affect the innermost "
	"pointer type of %1">;

	def warn_null_resettable_setter : Warning<
	"synthesized setter %0 for null_resettable property %1 does not handle nil">,
	InGroup<Nullability>;

	def warn_nullability_missing : Warning<
	"%select{pointer\|block pointer\|member pointer}0 is missing a nullability "
	"type specifier (_Nonnull, _Nullable, or _Null_unspecified)">,
	InGroup<NullabilityCompleteness>;
	def warn_nullability_missing_array : Warning<
	"array parameter is missing a nullability type specifier (_Nonnull, "
	"_Nullable, or _Null_unspecified)">,
	InGroup<NullabilityCompletenessOnArrays>;
	def note_nullability_fix_it : Note<
	"insert '%select{_Nonnull\|_Nullable\|_Null_unspecified}0' if the "
	"%select{pointer\|block pointer\|member pointer\|array parameter}1 "
	"%select{should never be null\|may be null\|should not declare nullability}0">;

	def warn_nullability_inferred_on_nested_type : Warning<
	"inferring '_Nonnull' for pointer type within %select{array\|reference}0 is "
	"deprecated">,
	InGroup<NullabilityInferredOnNestedType>;

	def err_objc_type_arg_explicit_nullability : Error<
	"type argument %0 cannot explicitly specify nullability">;

	def err_objc_type_param_bound_explicit_nullability : Error<
	"type parameter %0 bound %1 cannot explicitly specify nullability">;

	}

	let CategoryName = "Generics Issue" in {

	def err_objc_type_param_bound_nonobject : Error<
	"type bound %0 for type parameter %1 is not an Objective-C pointer type">;

	def err_objc_type_param_bound_missing_pointer : Error<
	"missing '*' in type bound %0 for type parameter %1">;
	def err_objc_type_param_bound_qualified : Error<
	"type bound %1 for type parameter %0 cannot be qualified with '%2'">;

	def err_objc_type_param_redecl : Error<
	"redeclaration of type parameter %0">;

	def err_objc_type_param_arity_mismatch : Error<
	"%select{forward class declaration\|class definition\|category\|extension}0 has "
	"too %select{few\|many}1 type parameters (expected %2, have %3)">;

	def err_objc_type_param_bound_conflict : Error<
	"type bound %0 for type parameter %1 conflicts with "
	"%select{implicit\|previous}2 bound %3%select{for type parameter %5\|}4">;

	def err_objc_type_param_variance_conflict : Error<
	"%select{in\|co\|contra}0variant type parameter %1 conflicts with previous "
	"%select{in\|co\|contra}2variant type parameter %3">;

	def note_objc_type_param_here : Note<"type parameter %0 declared here">;

	def err_objc_type_param_bound_missing : Error<
	"missing type bound %0 for type parameter %1 in %select{@interface\|@class}2">;

	def err_objc_parameterized_category_nonclass : Error<
	"%select{extension\|category}0 of non-parameterized class %1 cannot have type "
	"parameters">;

	def err_objc_parameterized_forward_class : Error<
	"forward declaration of non-parameterized class %0 cannot have type "
	"parameters">;

	def err_objc_parameterized_forward_class_first : Error<
	"class %0 previously declared with type parameters">;

	def err_objc_type_arg_missing_star : Error<
	"type argument %0 must be a pointer (requires a '*')">;
	def err_objc_type_arg_qualified : Error<
	"type argument %0 cannot be qualified with '%1'">;

	def err_objc_type_arg_missing : Error<
	"no type or protocol named %0">;

	def err_objc_type_args_and_protocols : Error<
	"angle brackets contain both a %select{type\|protocol}0 (%1) and a "
	"%select{protocol\|type}0 (%2)">;

	def err_objc_type_args_non_class : Error<
	"type arguments cannot be applied to non-class type %0">;

	def err_objc_type_args_non_parameterized_class : Error<
	"type arguments cannot be applied to non-parameterized class %0">;

	def err_objc_type_args_specialized_class : Error<
	"type arguments cannot be applied to already-specialized class type %0">;

	def err_objc_type_args_wrong_arity : Error<
	"too %select{many\|few}0 type arguments for class %1 (have %2, expected %3)">;
	}

	def err_objc_type_arg_not_id_compatible : Error<
	"type argument %0 is neither an Objective-C object nor a block type">;

	def err_objc_type_arg_does_not_match_bound : Error<
	"type argument %0 does not satisfy the bound (%1) of type parameter %2">;

	def warn_objc_redundant_qualified_class_type : Warning<
	"parameterized class %0 already conforms to the protocols listed; did you "
	"forget a '*'?">, InGroup<ObjCProtocolQualifiers>;

	def warn_block_literal_attributes_on_omitted_return_type : Warning<
	"attribute %0 ignored, because it cannot be applied to omitted return type">,
	InGroup<IgnoredAttributes>;

	def warn_block_literal_qualifiers_on_omitted_return_type : Warning<
	"'%0' qualifier on omitted return type %1 has no effect">,
	InGroup<IgnoredQualifiers>;

	def warn_shadow_field : Warning<
	"%select{parameter\|non-static data member}3 %0 %select{\|of %1 }3shadows "
	"member inherited from type %2">, InGroup<ShadowField>, DefaultIgnore;
	def note_shadow_field : Note<"declared here">;

	def err_multiversion_required_in_redecl : Error<
	"function declaration is missing %select{'target'\|'cpu_specific' or "
	"'cpu_dispatch'\|'target_version'}0 attribute in a multiversioned function">;
	def note_multiversioning_caused_here : Note<
	"function multiversioning caused by this declaration">;
	def err_multiversion_after_used : Error<
	"function declaration cannot become a multiversioned function after first "
	"usage">;
	def err_bad_multiversion_option : Error<
	"function multiversioning doesn't support %select{feature\|architecture}0 "
	"'%1'">;
	def err_multiversion_duplicate : Error<
	"multiversioned function redeclarations require identical target attributes">;
	def err_multiversion_noproto : Error<
	"multiversioned function must have a prototype">;
	def err_multiversion_disallowed_other_attr
	: Error<"attribute "
	"'%select{\|target\|cpu_specific\|cpu_dispatch\|target_clones\|target_version}0' "
	"multiversioning cannot be combined"
	" with attribute %1">;
	def err_multiversion_diff : Error<
	"multiversioned function declaration has a different %select{calling convention"
	"\|return type\|constexpr specification\|inline specification\|linkage\|"
	"language linkage}0">;
	def err_multiversion_doesnt_support
	: Error<"attribute "
	"'%select{\|target\|cpu_specific\|cpu_dispatch\|target_clones\|target_version}0' "
	"multiversioned functions do not "
	"yet support %select{function templates\|virtual functions\|"
	"deduced return types\|constructors\|destructors\|deleted functions\|"
	"defaulted functions\|constexpr functions\|consteval "
	"function\|lambdas}1">;
	def err_multiversion_not_allowed_on_main : Error<
	"'main' cannot be a multiversioned function">;
	def err_multiversion_not_supported : Error<
	"function multiversioning is not supported on the current target">;
	def err_multiversion_types_mixed : Error<
	"multiversioning attributes cannot be combined">;
	def err_cpu_dispatch_mismatch : Error<
	"'cpu_dispatch' function redeclared with different CPUs">;
	def err_cpu_specific_multiple_defs : Error<
	"multiple 'cpu_specific' functions cannot specify the same CPU: %0">;
	def warn_multiversion_duplicate_entries : Warning<
	"CPU list contains duplicate entries; attribute ignored">,
	InGroup<FunctionMultiVersioning>;
	def warn_dispatch_body_ignored : Warning<
	"body of cpu_dispatch function will be ignored">,
	InGroup<FunctionMultiVersioning>;
	def err_target_clone_must_have_default
	: Error<"'target_clones' multiversioning requires a default target">;
	def err_target_clone_doesnt_match
	: Error<"'target_clones' attribute does not match previous declaration">;
	def warn_target_clone_mixed_values
	: ExtWarn<
	"mixing 'target_clones' specifier mechanisms is permitted for GCC "
	"compatibility; use a comma separated sequence of string literals, "
	"or a string literal containing a comma-separated list of versions">,
	InGroup<TargetClonesMixedSpecifiers>;
	def warn_target_clone_duplicate_options
	: Warning<"version list contains duplicate entries">,
	InGroup<FunctionMultiVersioning>;
	def warn_target_clone_no_impact_options
	: Warning<"version list contains entries that don't impact code generation">,
	InGroup<FunctionMultiVersioning>;

	// three-way comparison operator diagnostics
	def err_implied_comparison_category_type_not_found : Error<
	"cannot %select{use builtin operator '<=>'\|default 'operator<=>'}1 "
	"because type '%0' was not found; include <compare>">;
	def err_spaceship_argument_narrowing : Error<
	"argument to 'operator<=>' "
	"%select{cannot be narrowed from type %1 to %2\|"
	"evaluates to %1, which cannot be narrowed to type %2}0">;
	def err_std_compare_type_not_supported : Error<
	"standard library implementation of %0 is not supported; "
	"%select{member '%2' does not have expected form\|"
	"member '%2' is missing\|"
	"the type is not trivially copyable\|"
	"the type does not have the expected form}1">;
	def note_rewriting_operator_as_spaceship : Note<
	"while rewriting comparison as call to 'operator<=>' declared here">;
	def err_three_way_vector_comparison : Error<
	"three-way comparison between vectors is not supported">;

	// Memory Tagging Extensions (MTE) diagnostics
	def err_memtag_arg_null_or_pointer : Error<
	"%0 argument of MTE builtin function must be a null or a pointer (%1 invalid)">;
	def err_memtag_any2arg_pointer : Error<
	"at least one argument of MTE builtin function must be a pointer (%0, %1 invalid)">;
	def err_memtag_arg_must_be_pointer : Error<
	"%0 argument of MTE builtin function must be a pointer (%1 invalid)">;
	def err_memtag_arg_must_be_integer : Error<
	"%0 argument of MTE builtin function must be an integer type (%1 invalid)">;

	def warn_dereference_of_noderef_type : Warning<
	"dereferencing %0; was declared with a 'noderef' type">, InGroup<NoDeref>;
	def warn_dereference_of_noderef_type_no_decl : Warning<
	"dereferencing expression marked as 'noderef'">, InGroup<NoDeref>;
	def warn_noderef_on_non_pointer_or_array : Warning<
	"'noderef' can only be used on an array or pointer type">, InGroup<IgnoredAttributes>;
	def warn_noderef_to_dereferenceable_pointer : Warning<
	"casting to dereferenceable pointer removes 'noderef' attribute">, InGroup<NoDeref>;

	def err_builtin_launder_invalid_arg : Error<
	"%select{non-pointer\|function pointer\|void pointer}0 argument to "
	"'__builtin_launder' is not allowed">;

	def err_builtin_invalid_arg_type: Error <
	"%ordinal0 argument must be a "
	"%select{vector, integer or floating point type\|matrix\|"
	"pointer to a valid matrix element type\|"
	"signed integer or floating point type\|vector type\|"
	"floating point type\|"
	"vector of integers}1 (was %2)">;

	def err_builtin_matrix_disabled: Error<
	"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
	def err_matrix_index_not_integer: Error<
	"matrix %select{row\|column}0 index is not an integer">;
	def err_matrix_index_outside_range: Error<
	"matrix %select{row\|column}0 index is outside the allowed range [0, %1)">;
	def err_matrix_incomplete_index: Error<
	"single subscript expressions are not allowed for matrix values">;
	def err_matrix_separate_incomplete_index: Error<
	"matrix row and column subscripts cannot be separated by any expression">;
	def err_matrix_subscript_comma: Error<
	"comma expressions are not allowed as indices in matrix subscript expressions">;
	def err_builtin_matrix_scalar_unsigned_arg: Error<
	"%0 argument must be a constant unsigned integer expression">;
	def err_builtin_matrix_pointer_arg_mismatch: Error<
	"the pointee of the 2nd argument must match the element type of the 1st argument (%0 != %1)">;
	def err_builtin_matrix_store_to_const: Error<
	"cannot store matrix to read-only pointer">;
	def err_builtin_matrix_stride_too_small: Error<
	"stride must be greater or equal to the number of rows">;
	def err_builtin_matrix_invalid_dimension: Error<
	"%0 dimension is outside the allowed range [1, %1]">;

	def warn_mismatched_import : Warning<
	"import %select{module\|name}0 (%1) does not match the import %select{module\|name}0 (%2) of the "
	"previous declaration">,
	InGroup<IgnoredAttributes>;
	def warn_import_on_definition : Warning<
	"import %select{module\|name}0 cannot be applied to a function with a definition">,
	InGroup<IgnoredAttributes>;

	def err_preserve_field_info_not_field : Error<
	"__builtin_preserve_field_info argument %0 not a field access">;
	def err_preserve_field_info_not_const: Error<
	"__builtin_preserve_field_info argument %0 not a constant">;
	def err_btf_type_id_not_const: Error<
	"__builtin_btf_type_id argument %0 not a constant">;
	def err_preserve_type_info_invalid : Error<
	"__builtin_preserve_type_info argument %0 invalid">;
	def err_preserve_type_info_not_const: Error<
	"__builtin_preserve_type_info argument %0 not a constant">;
	def err_preserve_enum_value_invalid : Error<
	"__builtin_preserve_enum_value argument %0 invalid">;
	def err_preserve_enum_value_not_const: Error<
	"__builtin_preserve_enum_value argument %0 not a constant">;

	def err_bit_cast_non_trivially_copyable : Error<
	"__builtin_bit_cast %select{source\|destination}0 type must be trivially copyable">;
	def err_bit_cast_type_size_mismatch : Error<
	"__builtin_bit_cast source size does not equal destination size (%0 vs %1)">;

	// SYCL-specific diagnostics
	def warn_sycl_kernel_num_of_template_params : Warning<
	"'sycl_kernel' attribute only applies to a function template with at least"
	" two template parameters">, InGroup<IgnoredAttributes>;
	def warn_sycl_kernel_invalid_template_param_type : Warning<
	"template parameter of a function template with the 'sycl_kernel' attribute"
	" cannot be a non-type template parameter">, InGroup<IgnoredAttributes>;
	def warn_sycl_kernel_num_of_function_params : Warning<
	"function template with 'sycl_kernel' attribute must have a single parameter">,
	InGroup<IgnoredAttributes>;
	def warn_sycl_kernel_return_type : Warning<
	"function template with 'sycl_kernel' attribute must have a 'void' return type">,
	InGroup<IgnoredAttributes>;
	def err_sycl_special_type_num_init_method : Error<
	"types with 'sycl_special_class' attribute must have one and only one '__init' "
	"method defined">;

	def err_bit_int_bad_size : Error<"%select{signed\|unsigned}0 _BitInt must "
	"have a bit size of at least %select{2\|1}0">;
	def err_bit_int_max_size : Error<"%select{signed\|unsigned}0 _BitInt of bit "
	"sizes greater than %1 not supported">;

	// errors of expect.with.probability
	def err_probability_not_constant_float : Error<
	"probability argument to __builtin_expect_with_probability must be constant "
	"floating-point expression">;
	def err_probability_out_of_range : Error<
	"probability argument to __builtin_expect_with_probability is outside the "
	"range [0.0, 1.0]">;

	// TCB warnings
	def err_tcb_conflicting_attributes : Error<
	"attributes '%0(\"%2\")' and '%1(\"%2\")' are mutually exclusive">;
	def warn_tcb_enforcement_violation : Warning<
	"calling %0 is a violation of trusted computing base '%1'">,
	InGroup<DiagGroup<"tcb-enforcement">>;

	// RISC-V builtin required extension warning
	def err_riscv_builtin_requires_extension : Error<
	"builtin requires%select{\| at least one of the following extensions to be enabled}0: %1">;
	def err_riscv_builtin_invalid_lmul : Error<
	"LMUL argument must be in the range [0,3] or [5,7]">;

	def err_std_source_location_impl_not_found : Error<
	"'std::source_location::__impl' was not found; it must be defined before '__builtin_source_location' is called">;
	def err_std_source_location_impl_malformed : Error<
	"'std::source_location::__impl' must be standard-layout and have only two 'const char *' fields '_M_file_name' and '_M_function_name', and two integral fields '_M_line' and '_M_column'">;

	// HLSL Diagnostics
	def err_hlsl_attr_unsupported_in_stage : Error<"attribute %0 is unsupported in %select{Pixel\|Vertex\|Geometry\|Hull\|Domain\|Compute\|Library\|RayGeneration\|Intersection\|AnyHit\|ClosestHit\|Miss\|Callable\|Mesh\|Amplification\|Invalid}1 shaders, requires %2">;
	def err_hlsl_attr_invalid_type : Error<
	"attribute %0 only applies to a field or parameter of type '%1'">;
	def err_hlsl_attr_invalid_ast_node : Error<
	"attribute %0 only applies to %1">;
	def err_hlsl_numthreads_argument_oor : Error<"argument '%select{X\|Y\|Z}0' to numthreads attribute cannot exceed %1">;
	def err_hlsl_numthreads_invalid : Error<"total number of threads cannot exceed %0">;
	def err_hlsl_missing_numthreads : Error<"missing numthreads attribute for %0 shader entry">;
	def err_hlsl_attribute_param_mismatch : Error<"%0 attribute parameters do not match the previous declaration">;
	def err_hlsl_missing_semantic_annotation : Error<
	"semantic annotations must be present for all parameters of an entry "
	"function or patch constant function">;
	def err_hlsl_init_priority_unsupported : Error<
	"initializer priorities are not supported in HLSL">;

	def err_hlsl_unsupported_register_type : Error<"invalid resource class specifier '%0' used; expected 'b', 's', 't', or 'u'">;
	def err_hlsl_unsupported_register_number : Error<"register number should be an integer">;
	def err_hlsl_expected_space : Error<"invalid space specifier '%0' used; expected 'space' followed by an integer, like space1">;
	def err_hlsl_pointers_unsupported : Error<
	"%select{pointers\|references}0 are unsupported in HLSL">;

	def err_hlsl_operator_unsupported : Error<
	"the '%select{&\|*\|->}0' operator is unsupported in HLSL">;

	// Layout randomization diagnostics.
	def err_non_designated_init_used : Error<
	"a randomized struct can only be initialized with a designated initializer">;
	def err_cast_from_randomized_struct : Error<
	"casting from randomized structure pointer type %0 to %1">;

	// LoongArch-specific Diagnostics
	def err_loongarch_builtin_requires_la64 : Error<
	"this builtin requires target: loongarch64">;

	// Unsafe buffer usage diagnostics.
	def warn_unsafe_buffer_variable : Warning<
	"%0 is an %select{unsafe pointer used for buffer access\|unsafe buffer that "
	"does not perform bounds checks}1">,
	InGroup<UnsafeBufferUsage>, DefaultIgnore;
	def warn_unsafe_buffer_operation : Warning<
	"%select{unsafe pointer operation\|unsafe pointer arithmetic\|"
	"unsafe buffer access}0">,
	InGroup<UnsafeBufferUsage>, DefaultIgnore;
	def note_unsafe_buffer_operation : Note<
	"used%select{\| in pointer arithmetic\| in buffer access}0 here">;
	def err_loongarch_builtin_requires_la32 : Error<
	"this builtin requires target: loongarch32">;
	} // end of sema component.
	diff --git a/contrib/llvm-project/clang/include/clang/Sema/Initialization.h b/contrib/llvm-project/clang/include/clang/Sema/Initialization.h
	index e5a98ba97f4f..e1bbea0d118d 100644
	--- a/contrib/llvm-project/clang/include/clang/Sema/Initialization.h
	+++ b/contrib/llvm-project/clang/include/clang/Sema/Initialization.h
	@@ -1,1422 +1,1436 @@
	//===- Initialization.h - Semantic Analysis for Initializers ----- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file provides supporting data types for initialization of objects.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_SEMA_INITIALIZATION_H
	#define LLVM_CLANG_SEMA_INITIALIZATION_H

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclAccessPair.h"
	#include "clang/AST/DeclarationName.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/Type.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/Sema/Overload.h"
	#include "clang/Sema/Ownership.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/Support/Casting.h"
	#include <cassert>
	#include <cstdint>
	#include <string>

	namespace clang {

	class CXXBaseSpecifier;
	class CXXConstructorDecl;
	class ObjCMethodDecl;
	class Sema;

	/// Describes an entity that is being initialized.
	class alignas(8) InitializedEntity {
	public:
	/// Specifies the kind of entity being initialized.
	enum EntityKind {
	/// The entity being initialized is a variable.
	EK_Variable,

	/// The entity being initialized is a function parameter.
	EK_Parameter,

	/// The entity being initialized is a non-type template parameter.
	EK_TemplateParameter,

	/// The entity being initialized is the result of a function call.
	EK_Result,

	/// The entity being initialized is the result of a statement expression.
	EK_StmtExprResult,

	/// The entity being initialized is an exception object that
	/// is being thrown.
	EK_Exception,

	/// The entity being initialized is a non-static data member
	/// subobject.
	EK_Member,

	/// The entity being initialized is an element of an array.
	EK_ArrayElement,

	/// The entity being initialized is an object (or array of
	/// objects) allocated via new.
	EK_New,

	/// The entity being initialized is a temporary object.
	EK_Temporary,

	/// The entity being initialized is a base member subobject.
	EK_Base,

	/// The initialization is being done by a delegating constructor.
	EK_Delegating,

	/// The entity being initialized is an element of a vector.
	/// or vector.
	EK_VectorElement,

	/// The entity being initialized is a field of block descriptor for
	/// the copied-in c++ object.
	EK_BlockElement,

	/// The entity being initialized is a field of block descriptor for the
	/// copied-in lambda object that's used in the lambda to block conversion.
	EK_LambdaToBlockConversionBlockElement,

	/// The entity being initialized is the real or imaginary part of a
	/// complex number.
	EK_ComplexElement,

	/// The entity being initialized is the field that captures a
	/// variable in a lambda.
	EK_LambdaCapture,

	/// The entity being initialized is the initializer for a compound
	/// literal.
	EK_CompoundLiteralInit,

	/// The entity being implicitly initialized back to the formal
	/// result type.
	EK_RelatedResult,

	/// The entity being initialized is a function parameter; function
	/// is member of group of audited CF APIs.
	EK_Parameter_CF_Audited,

	/// The entity being initialized is a structured binding of a
	/// decomposition declaration.
	EK_Binding,

	+ /// The entity being initialized is a non-static data member subobject of an
	+ /// object initialized via parenthesized aggregate initialization.
	+ EK_ParenAggInitMember,
	+
	// Note: err_init_conversion_failed in DiagnosticSemaKinds.td uses this
	// enum as an index for its first %select. When modifying this list,
	// that diagnostic text needs to be updated as well.
	};

	private:
	/// The kind of entity being initialized.
	EntityKind Kind;

	/// If non-NULL, the parent entity in which this
	/// initialization occurs.
	const InitializedEntity *Parent = nullptr;

	/// The type of the object or reference being initialized.
	QualType Type;

	/// The mangling number for the next reference temporary to be created.
	mutable unsigned ManglingNumber = 0;

	struct LN {
	/// When Kind == EK_Result, EK_Exception, EK_New, the
	/// location of the 'return', 'throw', or 'new' keyword,
	/// respectively. When Kind == EK_Temporary, the location where
	/// the temporary is being created.
	SourceLocation Location;

	/// Whether the entity being initialized may end up using the
	/// named return value optimization (NRVO).
	bool NRVO;
	};

	struct VD {
	/// The VarDecl, FieldDecl, or BindingDecl being initialized.
	ValueDecl *VariableOrMember;

	/// When Kind == EK_Member, whether this is an implicit member
	/// initialization in a copy or move constructor. These can perform array
	/// copies.
	bool IsImplicitFieldInit;

	/// When Kind == EK_Member, whether this is the initial initialization
	/// check for a default member initializer.
	bool IsDefaultMemberInit;
	};

	struct C {
	/// The name of the variable being captured by an EK_LambdaCapture.
	IdentifierInfo *VarID;

	/// The source location at which the capture occurs.
	SourceLocation Location;
	};

	union {
	/// When Kind == EK_Variable, EK_Member, EK_Binding, or
	/// EK_TemplateParameter, the variable, binding, or template parameter.
	VD Variable;

	/// When Kind == EK_RelatedResult, the ObjectiveC method where
	/// result type was implicitly changed to accommodate ARC semantics.
	ObjCMethodDecl *MethodDecl;

	/// When Kind == EK_Parameter, the ParmVarDecl, with the
	/// integer indicating whether the parameter is "consumed".
	llvm::PointerIntPair<ParmVarDecl *, 1> Parameter;

	/// When Kind == EK_Temporary or EK_CompoundLiteralInit, the type
	/// source information for the temporary.
	TypeSourceInfo *TypeInfo;

	struct LN LocAndNRVO;

	/// When Kind == EK_Base, the base specifier that provides the
	/// base class. The integer specifies whether the base is an inherited
	/// virtual base.
	llvm::PointerIntPair<const CXXBaseSpecifier *, 1> Base;

	/// When Kind == EK_ArrayElement, EK_VectorElement, or
	/// EK_ComplexElement, the index of the array or vector element being
	/// initialized.
	unsigned Index;

	struct C Capture;
	};

	InitializedEntity() {};

	/// Create the initialization entity for a variable.
	InitializedEntity(VarDecl *Var, EntityKind EK = EK_Variable)
	: Kind(EK), Type(Var->getType()), Variable{Var, false, false} {}

	/// Create the initialization entity for the result of a
	/// function, throwing an object, performing an explicit cast, or
	/// initializing a parameter for which there is no declaration.
	InitializedEntity(EntityKind Kind, SourceLocation Loc, QualType Type,
	bool NRVO = false)
	: Kind(Kind), Type(Type) {
	new (&LocAndNRVO) LN;
	LocAndNRVO.Location = Loc;
	LocAndNRVO.NRVO = NRVO;
	}

	/// Create the initialization entity for a member subobject.
	InitializedEntity(FieldDecl Member, const InitializedEntity Parent,
	- bool Implicit, bool DefaultMemberInit)
	- : Kind(EK_Member), Parent(Parent), Type(Member->getType()),
	+ bool Implicit, bool DefaultMemberInit,
	+ bool IsParenAggInit = false)
	+ : Kind(IsParenAggInit ? EK_ParenAggInitMember : EK_Member),
	+ Parent(Parent), Type(Member->getType()),
	Variable{Member, Implicit, DefaultMemberInit} {}

	/// Create the initialization entity for an array element.
	InitializedEntity(ASTContext &Context, unsigned Index,
	const InitializedEntity &Parent);

	/// Create the initialization entity for a lambda capture.
	InitializedEntity(IdentifierInfo *VarID, QualType FieldType, SourceLocation Loc)
	: Kind(EK_LambdaCapture), Type(FieldType) {
	new (&Capture) C;
	Capture.VarID = VarID;
	Capture.Location = Loc;
	}

	public:
	/// Create the initialization entity for a variable.
	static InitializedEntity InitializeVariable(VarDecl *Var) {
	return InitializedEntity(Var);
	}

	/// Create the initialization entity for a parameter.
	static InitializedEntity InitializeParameter(ASTContext &Context,
	ParmVarDecl *Parm) {
	return InitializeParameter(Context, Parm, Parm->getType());
	}

	/// Create the initialization entity for a parameter, but use
	/// another type.
	static InitializedEntity
	InitializeParameter(ASTContext &Context, ParmVarDecl *Parm, QualType Type) {
	bool Consumed = (Context.getLangOpts().ObjCAutoRefCount &&
	Parm->hasAttr<NSConsumedAttr>());

	InitializedEntity Entity;
	Entity.Kind = EK_Parameter;
	Entity.Type =
	Context.getVariableArrayDecayedType(Type.getUnqualifiedType());
	Entity.Parent = nullptr;
	Entity.Parameter = {Parm, Consumed};
	return Entity;
	}

	/// Create the initialization entity for a parameter that is
	/// only known by its type.
	static InitializedEntity InitializeParameter(ASTContext &Context,
	QualType Type,
	bool Consumed) {
	InitializedEntity Entity;
	Entity.Kind = EK_Parameter;
	Entity.Type = Context.getVariableArrayDecayedType(Type);
	Entity.Parent = nullptr;
	Entity.Parameter = {nullptr, Consumed};
	return Entity;
	}

	/// Create the initialization entity for a template parameter.
	static InitializedEntity
	InitializeTemplateParameter(QualType T, NonTypeTemplateParmDecl *Param) {
	InitializedEntity Entity;
	Entity.Kind = EK_TemplateParameter;
	Entity.Type = T;
	Entity.Parent = nullptr;
	Entity.Variable = {Param, false, false};
	return Entity;
	}

	/// Create the initialization entity for the result of a function.
	static InitializedEntity InitializeResult(SourceLocation ReturnLoc,
	QualType Type) {
	return InitializedEntity(EK_Result, ReturnLoc, Type);
	}

	static InitializedEntity InitializeStmtExprResult(SourceLocation ReturnLoc,
	QualType Type) {
	return InitializedEntity(EK_StmtExprResult, ReturnLoc, Type);
	}

	static InitializedEntity InitializeBlock(SourceLocation BlockVarLoc,
	QualType Type) {
	return InitializedEntity(EK_BlockElement, BlockVarLoc, Type);
	}

	static InitializedEntity InitializeLambdaToBlock(SourceLocation BlockVarLoc,
	QualType Type) {
	return InitializedEntity(EK_LambdaToBlockConversionBlockElement,
	BlockVarLoc, Type);
	}

	/// Create the initialization entity for an exception object.
	static InitializedEntity InitializeException(SourceLocation ThrowLoc,
	QualType Type) {
	return InitializedEntity(EK_Exception, ThrowLoc, Type);
	}

	/// Create the initialization entity for an object allocated via new.
	static InitializedEntity InitializeNew(SourceLocation NewLoc, QualType Type) {
	return InitializedEntity(EK_New, NewLoc, Type);
	}

	/// Create the initialization entity for a temporary.
	static InitializedEntity InitializeTemporary(QualType Type) {
	return InitializeTemporary(nullptr, Type);
	}

	/// Create the initialization entity for a temporary.
	static InitializedEntity InitializeTemporary(ASTContext &Context,
	TypeSourceInfo *TypeInfo) {
	QualType Type = TypeInfo->getType();
	if (Context.getLangOpts().OpenCLCPlusPlus) {
	assert(!Type.hasAddressSpace() && "Temporary already has address space!");
	Type = Context.getAddrSpaceQualType(Type, LangAS::opencl_private);
	}

	return InitializeTemporary(TypeInfo, Type);
	}

	/// Create the initialization entity for a temporary.
	static InitializedEntity InitializeTemporary(TypeSourceInfo *TypeInfo,
	QualType Type) {
	InitializedEntity Result(EK_Temporary, SourceLocation(), Type);
	Result.TypeInfo = TypeInfo;
	return Result;
	}

	/// Create the initialization entity for a related result.
	static InitializedEntity InitializeRelatedResult(ObjCMethodDecl *MD,
	QualType Type) {
	InitializedEntity Result(EK_RelatedResult, SourceLocation(), Type);
	Result.MethodDecl = MD;
	return Result;
	}

	/// Create the initialization entity for a base class subobject.
	static InitializedEntity
	InitializeBase(ASTContext &Context, const CXXBaseSpecifier *Base,
	bool IsInheritedVirtualBase,
	const InitializedEntity *Parent = nullptr);

	/// Create the initialization entity for a delegated constructor.
	static InitializedEntity InitializeDelegation(QualType Type) {
	return InitializedEntity(EK_Delegating, SourceLocation(), Type);
	}

	/// Create the initialization entity for a member subobject.
	static InitializedEntity
	InitializeMember(FieldDecl *Member,
	const InitializedEntity *Parent = nullptr,
	bool Implicit = false) {
	return InitializedEntity(Member, Parent, Implicit, false);
	}

	/// Create the initialization entity for a member subobject.
	static InitializedEntity
	InitializeMember(IndirectFieldDecl *Member,
	const InitializedEntity *Parent = nullptr,
	bool Implicit = false) {
	return InitializedEntity(Member->getAnonField(), Parent, Implicit, false);
	}

	+ /// Create the initialization entity for a member subobject initialized via
	+ /// parenthesized aggregate init.
	+ static InitializedEntity InitializeMemberFromParenAggInit(FieldDecl *Member) {
	+ return InitializedEntity(Member, /Parent=/nullptr, /Implicit=/false,
	+ /DefaultMemberInit=/false,
	+ /IsParenAggInit=/true);
	+ }
	+
	/// Create the initialization entity for a default member initializer.
	static InitializedEntity
	InitializeMemberFromDefaultMemberInitializer(FieldDecl *Member) {
	return InitializedEntity(Member, nullptr, false, true);
	}

	/// Create the initialization entity for an array element.
	static InitializedEntity InitializeElement(ASTContext &Context,
	unsigned Index,
	const InitializedEntity &Parent) {
	return InitializedEntity(Context, Index, Parent);
	}

	/// Create the initialization entity for a structured binding.
	static InitializedEntity InitializeBinding(VarDecl *Binding) {
	return InitializedEntity(Binding, EK_Binding);
	}

	/// Create the initialization entity for a lambda capture.
	///
	/// \p VarID The name of the entity being captured, or nullptr for 'this'.
	static InitializedEntity InitializeLambdaCapture(IdentifierInfo *VarID,
	QualType FieldType,
	SourceLocation Loc) {
	return InitializedEntity(VarID, FieldType, Loc);
	}

	/// Create the entity for a compound literal initializer.
	static InitializedEntity InitializeCompoundLiteralInit(TypeSourceInfo *TSI) {
	InitializedEntity Result(EK_CompoundLiteralInit, SourceLocation(),
	TSI->getType());
	Result.TypeInfo = TSI;
	return Result;
	}

	/// Determine the kind of initialization.
	EntityKind getKind() const { return Kind; }

	/// Retrieve the parent of the entity being initialized, when
	/// the initialization itself is occurring within the context of a
	/// larger initialization.
	const InitializedEntity *getParent() const { return Parent; }

	/// Retrieve type being initialized.
	QualType getType() const { return Type; }

	/// Retrieve complete type-source information for the object being
	/// constructed, if known.
	TypeSourceInfo *getTypeSourceInfo() const {
	if (Kind == EK_Temporary \|\| Kind == EK_CompoundLiteralInit)
	return TypeInfo;

	return nullptr;
	}

	/// Retrieve the name of the entity being initialized.
	DeclarationName getName() const;

	/// Retrieve the variable, parameter, or field being
	/// initialized.
	ValueDecl *getDecl() const;

	/// Retrieve the ObjectiveC method being initialized.
	ObjCMethodDecl *getMethodDecl() const { return MethodDecl; }

	/// Determine whether this initialization allows the named return
	/// value optimization, which also applies to thrown objects.
	bool allowsNRVO() const;

	bool isParameterKind() const {
	return (getKind() == EK_Parameter \|\|
	getKind() == EK_Parameter_CF_Audited);
	}

	bool isParamOrTemplateParamKind() const {
	return isParameterKind() \|\| getKind() == EK_TemplateParameter;
	}

	/// Determine whether this initialization consumes the
	/// parameter.
	bool isParameterConsumed() const {
	assert(isParameterKind() && "Not a parameter");
	return Parameter.getInt();
	}

	/// Retrieve the base specifier.
	const CXXBaseSpecifier *getBaseSpecifier() const {
	assert(getKind() == EK_Base && "Not a base specifier");
	return Base.getPointer();
	}

	/// Return whether the base is an inherited virtual base.
	bool isInheritedVirtualBase() const {
	assert(getKind() == EK_Base && "Not a base specifier");
	return Base.getInt();
	}

	/// Determine whether this is an array new with an unknown bound.
	bool isVariableLengthArrayNew() const {
	return getKind() == EK_New && isa_and_nonnull<IncompleteArrayType>(
	getType()->getAsArrayTypeUnsafe());
	}

	/// Is this the implicit initialization of a member of a class from
	/// a defaulted constructor?
	bool isImplicitMemberInitializer() const {
	return getKind() == EK_Member && Variable.IsImplicitFieldInit;
	}

	/// Is this the default member initializer of a member (specified inside
	/// the class definition)?
	bool isDefaultMemberInitializer() const {
	return getKind() == EK_Member && Variable.IsDefaultMemberInit;
	}

	/// Determine the location of the 'return' keyword when initializing
	/// the result of a function call.
	SourceLocation getReturnLoc() const {
	assert(getKind() == EK_Result && "No 'return' location!");
	return LocAndNRVO.Location;
	}

	/// Determine the location of the 'throw' keyword when initializing
	/// an exception object.
	SourceLocation getThrowLoc() const {
	assert(getKind() == EK_Exception && "No 'throw' location!");
	return LocAndNRVO.Location;
	}

	/// If this is an array, vector, or complex number element, get the
	/// element's index.
	unsigned getElementIndex() const {
	assert(getKind() == EK_ArrayElement \|\| getKind() == EK_VectorElement \|\|
	getKind() == EK_ComplexElement);
	return Index;
	}

	/// If this is already the initializer for an array or vector
	/// element, sets the element index.
	void setElementIndex(unsigned Index) {
	assert(getKind() == EK_ArrayElement \|\| getKind() == EK_VectorElement \|\|
	getKind() == EK_ComplexElement);
	this->Index = Index;
	}

	/// For a lambda capture, return the capture's name.
	StringRef getCapturedVarName() const {
	assert(getKind() == EK_LambdaCapture && "Not a lambda capture!");
	return Capture.VarID ? Capture.VarID->getName() : "this";
	}

	/// Determine the location of the capture when initializing
	/// field from a captured variable in a lambda.
	SourceLocation getCaptureLoc() const {
	assert(getKind() == EK_LambdaCapture && "Not a lambda capture!");
	return Capture.Location;
	}

	void setParameterCFAudited() {
	Kind = EK_Parameter_CF_Audited;
	}

	unsigned allocateManglingNumber() const { return ++ManglingNumber; }

	/// Dump a representation of the initialized entity to standard error,
	/// for debugging purposes.
	void dump() const;

	private:
	unsigned dumpImpl(raw_ostream &OS) const;
	};

	/// Describes the kind of initialization being performed, along with
	/// location information for tokens related to the initialization (equal sign,
	/// parentheses).
	class InitializationKind {
	public:
	/// The kind of initialization being performed.
	enum InitKind {
	/// Direct initialization
	IK_Direct,

	/// Direct list-initialization
	IK_DirectList,

	/// Copy initialization
	IK_Copy,

	/// Default initialization
	IK_Default,

	/// Value initialization
	IK_Value
	};

	private:
	/// The context of the initialization.
	enum InitContext {
	/// Normal context
	IC_Normal,

	/// Normal context, but allows explicit conversion functionss
	IC_ExplicitConvs,

	/// Implicit context (value initialization)
	IC_Implicit,

	/// Static cast context
	IC_StaticCast,

	/// C-style cast context
	IC_CStyleCast,

	/// Functional cast context
	IC_FunctionalCast
	};

	/// The kind of initialization being performed.
	InitKind Kind : 8;

	/// The context of the initialization.
	InitContext Context : 8;

	/// The source locations involved in the initialization.
	SourceLocation Locations[3];

	InitializationKind(InitKind Kind, InitContext Context, SourceLocation Loc1,
	SourceLocation Loc2, SourceLocation Loc3)
	: Kind(Kind), Context(Context) {
	Locations[0] = Loc1;
	Locations[1] = Loc2;
	Locations[2] = Loc3;
	}

	public:
	/// Create a direct initialization.
	static InitializationKind CreateDirect(SourceLocation InitLoc,
	SourceLocation LParenLoc,
	SourceLocation RParenLoc) {
	return InitializationKind(IK_Direct, IC_Normal,
	InitLoc, LParenLoc, RParenLoc);
	}

	static InitializationKind CreateDirectList(SourceLocation InitLoc) {
	return InitializationKind(IK_DirectList, IC_Normal, InitLoc, InitLoc,
	InitLoc);
	}

	static InitializationKind CreateDirectList(SourceLocation InitLoc,
	SourceLocation LBraceLoc,
	SourceLocation RBraceLoc) {
	return InitializationKind(IK_DirectList, IC_Normal, InitLoc, LBraceLoc,
	RBraceLoc);
	}

	/// Create a direct initialization due to a cast that isn't a C-style
	/// or functional cast.
	static InitializationKind CreateCast(SourceRange TypeRange) {
	return InitializationKind(IK_Direct, IC_StaticCast, TypeRange.getBegin(),
	TypeRange.getBegin(), TypeRange.getEnd());
	}

	/// Create a direct initialization for a C-style cast.
	static InitializationKind CreateCStyleCast(SourceLocation StartLoc,
	SourceRange TypeRange,
	bool InitList) {
	// C++ cast syntax doesn't permit init lists, but C compound literals are
	// exactly that.
	return InitializationKind(InitList ? IK_DirectList : IK_Direct,
	IC_CStyleCast, StartLoc, TypeRange.getBegin(),
	TypeRange.getEnd());
	}

	/// Create a direct initialization for a functional cast.
	static InitializationKind CreateFunctionalCast(SourceRange TypeRange,
	bool InitList) {
	return InitializationKind(InitList ? IK_DirectList : IK_Direct,
	IC_FunctionalCast, TypeRange.getBegin(),
	TypeRange.getBegin(), TypeRange.getEnd());
	}

	/// Create a copy initialization.
	static InitializationKind CreateCopy(SourceLocation InitLoc,
	SourceLocation EqualLoc,
	bool AllowExplicitConvs = false) {
	return InitializationKind(IK_Copy,
	AllowExplicitConvs? IC_ExplicitConvs : IC_Normal,
	InitLoc, EqualLoc, EqualLoc);
	}

	/// Create a default initialization.
	static InitializationKind CreateDefault(SourceLocation InitLoc) {
	return InitializationKind(IK_Default, IC_Normal, InitLoc, InitLoc, InitLoc);
	}

	/// Create a value initialization.
	static InitializationKind CreateValue(SourceLocation InitLoc,
	SourceLocation LParenLoc,
	SourceLocation RParenLoc,
	bool isImplicit = false) {
	return InitializationKind(IK_Value, isImplicit ? IC_Implicit : IC_Normal,
	InitLoc, LParenLoc, RParenLoc);
	}

	/// Create an initialization from an initializer (which, for direct
	/// initialization from a parenthesized list, will be a ParenListExpr).
	static InitializationKind CreateForInit(SourceLocation Loc, bool DirectInit,
	Expr *Init) {
	if (!Init) return CreateDefault(Loc);
	if (!DirectInit)
	return CreateCopy(Loc, Init->getBeginLoc());
	if (isa<InitListExpr>(Init))
	return CreateDirectList(Loc, Init->getBeginLoc(), Init->getEndLoc());
	return CreateDirect(Loc, Init->getBeginLoc(), Init->getEndLoc());
	}

	/// Determine the initialization kind.
	InitKind getKind() const {
	return Kind;
	}

	/// Determine whether this initialization is an explicit cast.
	bool isExplicitCast() const {
	return Context >= IC_StaticCast;
	}

	/// Determine whether this initialization is a static cast.
	bool isStaticCast() const { return Context == IC_StaticCast; }

	/// Determine whether this initialization is a C-style cast.
	bool isCStyleOrFunctionalCast() const {
	return Context >= IC_CStyleCast;
	}

	/// Determine whether this is a C-style cast.
	bool isCStyleCast() const {
	return Context == IC_CStyleCast;
	}

	/// Determine whether this is a functional-style cast.
	bool isFunctionalCast() const {
	return Context == IC_FunctionalCast;
	}

	/// Determine whether this initialization is an implicit
	/// value-initialization, e.g., as occurs during aggregate
	/// initialization.
	bool isImplicitValueInit() const { return Context == IC_Implicit; }

	/// Retrieve the location at which initialization is occurring.
	SourceLocation getLocation() const { return Locations[0]; }

	/// Retrieve the source range that covers the initialization.
	SourceRange getRange() const {
	return SourceRange(Locations[0], Locations[2]);
	}

	/// Retrieve the location of the equal sign for copy initialization
	/// (if present).
	SourceLocation getEqualLoc() const {
	assert(Kind == IK_Copy && "Only copy initialization has an '='");
	return Locations[1];
	}

	bool isCopyInit() const { return Kind == IK_Copy; }

	/// Retrieve whether this initialization allows the use of explicit
	/// constructors.
	bool AllowExplicit() const { return !isCopyInit(); }

	/// Retrieve whether this initialization allows the use of explicit
	/// conversion functions when binding a reference. If the reference is the
	/// first parameter in a copy or move constructor, such conversions are
	/// permitted even though we are performing copy-initialization.
	bool allowExplicitConversionFunctionsInRefBinding() const {
	return !isCopyInit() \|\| Context == IC_ExplicitConvs;
	}

	/// Determine whether this initialization has a source range containing the
	/// locations of open and closing parentheses or braces.
	bool hasParenOrBraceRange() const {
	return Kind == IK_Direct \|\| Kind == IK_Value \|\| Kind == IK_DirectList;
	}

	/// Retrieve the source range containing the locations of the open
	/// and closing parentheses or braces for value, direct, and direct list
	/// initializations.
	SourceRange getParenOrBraceRange() const {
	assert(hasParenOrBraceRange() && "Only direct, value, and direct-list "
	"initialization have parentheses or "
	"braces");
	return SourceRange(Locations[1], Locations[2]);
	}
	};

	/// Describes the sequence of initializations required to initialize
	/// a given object or reference with a set of arguments.
	class InitializationSequence {
	public:
	/// Describes the kind of initialization sequence computed.
	enum SequenceKind {
	/// A failed initialization sequence. The failure kind tells what
	/// happened.
	FailedSequence = 0,

	/// A dependent initialization, which could not be
	/// type-checked due to the presence of dependent types or
	/// dependently-typed expressions.
	DependentSequence,

	/// A normal sequence.
	NormalSequence
	};

	/// Describes the kind of a particular step in an initialization
	/// sequence.
	enum StepKind {
	/// Resolve the address of an overloaded function to a specific
	/// function declaration.
	SK_ResolveAddressOfOverloadedFunction,

	/// Perform a derived-to-base cast, producing an rvalue.
	SK_CastDerivedToBasePRValue,

	/// Perform a derived-to-base cast, producing an xvalue.
	SK_CastDerivedToBaseXValue,

	/// Perform a derived-to-base cast, producing an lvalue.
	SK_CastDerivedToBaseLValue,

	/// Reference binding to an lvalue.
	SK_BindReference,

	/// Reference binding to a temporary.
	SK_BindReferenceToTemporary,

	/// An optional copy of a temporary object to another
	/// temporary object, which is permitted (but not required) by
	/// C++98/03 but not C++0x.
	SK_ExtraneousCopyToTemporary,

	/// Direct-initialization from a reference-related object in the
	/// final stage of class copy-initialization.
	SK_FinalCopy,

	/// Perform a user-defined conversion, either via a conversion
	/// function or via a constructor.
	SK_UserConversion,

	/// Perform a qualification conversion, producing a prvalue.
	SK_QualificationConversionPRValue,

	/// Perform a qualification conversion, producing an xvalue.
	SK_QualificationConversionXValue,

	/// Perform a qualification conversion, producing an lvalue.
	SK_QualificationConversionLValue,

	/// Perform a function reference conversion, see [dcl.init.ref]p4.
	SK_FunctionReferenceConversion,

	/// Perform a conversion adding _Atomic to a type.
	SK_AtomicConversion,

	/// Perform an implicit conversion sequence.
	SK_ConversionSequence,

	/// Perform an implicit conversion sequence without narrowing.
	SK_ConversionSequenceNoNarrowing,

	/// Perform list-initialization without a constructor.
	SK_ListInitialization,

	/// Unwrap the single-element initializer list for a reference.
	SK_UnwrapInitList,

	/// Rewrap the single-element initializer list for a reference.
	SK_RewrapInitList,

	/// Perform initialization via a constructor.
	SK_ConstructorInitialization,

	/// Perform initialization via a constructor, taking arguments from
	/// a single InitListExpr.
	SK_ConstructorInitializationFromList,

	/// Zero-initialize the object
	SK_ZeroInitialization,

	/// C assignment
	SK_CAssignment,

	/// Initialization by string
	SK_StringInit,

	/// An initialization that "converts" an Objective-C object
	/// (not a point to an object) to another Objective-C object type.
	SK_ObjCObjectConversion,

	/// Array indexing for initialization by elementwise copy.
	SK_ArrayLoopIndex,

	/// Array initialization by elementwise copy.
	SK_ArrayLoopInit,

	/// Array initialization (from an array rvalue).
	SK_ArrayInit,

	/// Array initialization (from an array rvalue) as a GNU extension.
	SK_GNUArrayInit,

	/// Array initialization from a parenthesized initializer list.
	/// This is a GNU C++ extension.
	SK_ParenthesizedArrayInit,

	/// Pass an object by indirect copy-and-restore.
	SK_PassByIndirectCopyRestore,

	/// Pass an object by indirect restore.
	SK_PassByIndirectRestore,

	/// Produce an Objective-C object pointer.
	SK_ProduceObjCObject,

	/// Construct a std::initializer_list from an initializer list.
	SK_StdInitializerList,

	/// Perform initialization via a constructor taking a single
	/// std::initializer_list argument.
	SK_StdInitializerListConstructorCall,

	/// Initialize an OpenCL sampler from an integer.
	SK_OCLSamplerInit,

	/// Initialize an opaque OpenCL type (event_t, queue_t, etc.) with zero
	SK_OCLZeroOpaqueType,

	/// Initialize an aggreagate with parenthesized list of values.
	/// This is a C++20 feature.
	SK_ParenthesizedListInit
	};

	/// A single step in the initialization sequence.
	class Step {
	public:
	/// The kind of conversion or initialization step we are taking.
	StepKind Kind;

	// The type that results from this initialization.
	QualType Type;

	struct F {
	bool HadMultipleCandidates;
	FunctionDecl *Function;
	DeclAccessPair FoundDecl;
	};

	union {
	/// When Kind == SK_ResolvedOverloadedFunction or Kind ==
	/// SK_UserConversion, the function that the expression should be
	/// resolved to or the conversion function to call, respectively.
	/// When Kind == SK_ConstructorInitialization or SK_ListConstruction,
	/// the constructor to be called.
	///
	/// Always a FunctionDecl, plus a Boolean flag telling if it was
	/// selected from an overloaded set having size greater than 1.
	/// For conversion decls, the naming class is the source type.
	/// For construct decls, the naming class is the target type.
	struct F Function;

	/// When Kind = SK_ConversionSequence, the implicit conversion
	/// sequence.
	ImplicitConversionSequence *ICS;

	/// When Kind = SK_RewrapInitList, the syntactic form of the
	/// wrapping list.
	InitListExpr *WrappingSyntacticList;
	};

	void Destroy();
	};

	private:
	/// The kind of initialization sequence computed.
	enum SequenceKind SequenceKind;

	/// Steps taken by this initialization.
	SmallVector<Step, 4> Steps;

	public:
	/// Describes why initialization failed.
	enum FailureKind {
	/// Too many initializers provided for a reference.
	FK_TooManyInitsForReference,

	/// Reference initialized from a parenthesized initializer list.
	FK_ParenthesizedListInitForReference,

	/// Array must be initialized with an initializer list.
	FK_ArrayNeedsInitList,

	/// Array must be initialized with an initializer list or a
	/// string literal.
	FK_ArrayNeedsInitListOrStringLiteral,

	/// Array must be initialized with an initializer list or a
	/// wide string literal.
	FK_ArrayNeedsInitListOrWideStringLiteral,

	/// Initializing a wide char array with narrow string literal.
	FK_NarrowStringIntoWideCharArray,

	/// Initializing char array with wide string literal.
	FK_WideStringIntoCharArray,

	/// Initializing wide char array with incompatible wide string
	/// literal.
	FK_IncompatWideStringIntoWideChar,

	/// Initializing char8_t array with plain string literal.
	FK_PlainStringIntoUTF8Char,

	/// Initializing char array with UTF-8 string literal.
	FK_UTF8StringIntoPlainChar,

	/// Array type mismatch.
	FK_ArrayTypeMismatch,

	/// Non-constant array initializer
	FK_NonConstantArrayInit,

	/// Cannot resolve the address of an overloaded function.
	FK_AddressOfOverloadFailed,

	/// Overloading due to reference initialization failed.
	FK_ReferenceInitOverloadFailed,

	/// Non-const lvalue reference binding to a temporary.
	FK_NonConstLValueReferenceBindingToTemporary,

	/// Non-const lvalue reference binding to a bit-field.
	FK_NonConstLValueReferenceBindingToBitfield,

	/// Non-const lvalue reference binding to a vector element.
	FK_NonConstLValueReferenceBindingToVectorElement,

	/// Non-const lvalue reference binding to a matrix element.
	FK_NonConstLValueReferenceBindingToMatrixElement,

	/// Non-const lvalue reference binding to an lvalue of unrelated
	/// type.
	FK_NonConstLValueReferenceBindingToUnrelated,

	/// Rvalue reference binding to an lvalue.
	FK_RValueReferenceBindingToLValue,

	/// Reference binding drops qualifiers.
	FK_ReferenceInitDropsQualifiers,

	/// Reference with mismatching address space binding to temporary.
	FK_ReferenceAddrspaceMismatchTemporary,

	/// Reference binding failed.
	FK_ReferenceInitFailed,

	/// Implicit conversion failed.
	FK_ConversionFailed,

	/// Implicit conversion failed.
	FK_ConversionFromPropertyFailed,

	/// Too many initializers for scalar
	FK_TooManyInitsForScalar,

	/// Scalar initialized from a parenthesized initializer list.
	FK_ParenthesizedListInitForScalar,

	/// Reference initialization from an initializer list
	FK_ReferenceBindingToInitList,

	/// Initialization of some unused destination type with an
	/// initializer list.
	FK_InitListBadDestinationType,

	/// Overloading for a user-defined conversion failed.
	FK_UserConversionOverloadFailed,

	/// Overloading for initialization by constructor failed.
	FK_ConstructorOverloadFailed,

	/// Overloading for list-initialization by constructor failed.
	FK_ListConstructorOverloadFailed,

	/// Default-initialization of a 'const' object.
	FK_DefaultInitOfConst,

	/// Initialization of an incomplete type.
	FK_Incomplete,

	/// Variable-length array must not have an initializer.
	FK_VariableLengthArrayHasInitializer,

	/// List initialization failed at some point.
	FK_ListInitializationFailed,

	/// Initializer has a placeholder type which cannot be
	/// resolved by initialization.
	FK_PlaceholderType,

	/// Trying to take the address of a function that doesn't support
	/// having its address taken.
	FK_AddressOfUnaddressableFunction,

	/// List-copy-initialization chose an explicit constructor.
	FK_ExplicitConstructor,

	/// Parenthesized list initialization failed at some point.
	/// This is a C++20 feature.
	FK_ParenthesizedListInitFailed,
	};

	private:
	/// The reason why initialization failed.
	FailureKind Failure;

	/// The failed result of overload resolution.
	OverloadingResult FailedOverloadResult;

	/// The candidate set created when initialization failed.
	OverloadCandidateSet FailedCandidateSet;

	/// The incomplete type that caused a failure.
	QualType FailedIncompleteType;

	/// The fixit that needs to be applied to make this initialization
	/// succeed.
	std::string ZeroInitializationFixit;
	SourceLocation ZeroInitializationFixitLoc;

	public:
	/// Call for initializations are invalid but that would be valid
	/// zero initialzations if Fixit was applied.
	void SetZeroInitializationFixit(const std::string& Fixit, SourceLocation L) {
	ZeroInitializationFixit = Fixit;
	ZeroInitializationFixitLoc = L;
	}

	private:
	/// Prints a follow-up note that highlights the location of
	/// the initialized entity, if it's remote.
	void PrintInitLocationNote(Sema &S, const InitializedEntity &Entity);

	public:
	/// Try to perform initialization of the given entity, creating a
	/// record of the steps required to perform the initialization.
	///
	/// The generated initialization sequence will either contain enough
	/// information to diagnose
	///
	/// \param S the semantic analysis object.
	///
	/// \param Entity the entity being initialized.
	///
	/// \param Kind the kind of initialization being performed.
	///
	/// \param Args the argument(s) provided for initialization.
	///
	/// \param TopLevelOfInitList true if we are initializing from an expression
	/// at the top level inside an initializer list. This disallows
	/// narrowing conversions in C++11 onwards.
	/// \param TreatUnavailableAsInvalid true if we want to treat unavailable
	/// as invalid.
	InitializationSequence(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	MultiExprArg Args,
	bool TopLevelOfInitList = false,
	bool TreatUnavailableAsInvalid = true);
	void InitializeFrom(Sema &S, const InitializedEntity &Entity,
	const InitializationKind &Kind, MultiExprArg Args,
	bool TopLevelOfInitList, bool TreatUnavailableAsInvalid);

	~InitializationSequence();

	/// Perform the actual initialization of the given entity based on
	/// the computed initialization sequence.
	///
	/// \param S the semantic analysis object.
	///
	/// \param Entity the entity being initialized.
	///
	/// \param Kind the kind of initialization being performed.
	///
	/// \param Args the argument(s) provided for initialization, ownership of
	/// which is transferred into the routine.
	///
	/// \param ResultType if non-NULL, will be set to the type of the
	/// initialized object, which is the type of the declaration in most
	/// cases. However, when the initialized object is a variable of
	/// incomplete array type and the initializer is an initializer
	/// list, this type will be set to the completed array type.
	///
	/// \returns an expression that performs the actual object initialization, if
	/// the initialization is well-formed. Otherwise, emits diagnostics
	/// and returns an invalid expression.
	ExprResult Perform(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	MultiExprArg Args,
	QualType *ResultType = nullptr);

	/// Diagnose an potentially-invalid initialization sequence.
	///
	/// \returns true if the initialization sequence was ill-formed,
	/// false otherwise.
	bool Diagnose(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	ArrayRef<Expr *> Args);

	/// Determine the kind of initialization sequence computed.
	enum SequenceKind getKind() const { return SequenceKind; }

	/// Set the kind of sequence computed.
	void setSequenceKind(enum SequenceKind SK) { SequenceKind = SK; }

	/// Determine whether the initialization sequence is valid.
	explicit operator bool() const { return !Failed(); }

	/// Determine whether the initialization sequence is invalid.
	bool Failed() const { return SequenceKind == FailedSequence; }

	using step_iterator = SmallVectorImpl<Step>::const_iterator;

	step_iterator step_begin() const { return Steps.begin(); }
	step_iterator step_end() const { return Steps.end(); }

	using step_range = llvm::iterator_range<step_iterator>;

	step_range steps() const { return {step_begin(), step_end()}; }

	/// Determine whether this initialization is a direct reference
	/// binding (C++ [dcl.init.ref]).
	bool isDirectReferenceBinding() const;

	/// Determine whether this initialization failed due to an ambiguity.
	bool isAmbiguous() const;

	/// Determine whether this initialization is direct call to a
	/// constructor.
	bool isConstructorInitialization() const;

	/// Add a new step in the initialization that resolves the address
	/// of an overloaded function to a specific function declaration.
	///
	/// \param Function the function to which the overloaded function reference
	/// resolves.
	void AddAddressOverloadResolutionStep(FunctionDecl *Function,
	DeclAccessPair Found,
	bool HadMultipleCandidates);

	/// Add a new step in the initialization that performs a derived-to-
	/// base cast.
	///
	/// \param BaseType the base type to which we will be casting.
	///
	/// \param Category Indicates whether the result will be treated as an
	/// rvalue, an xvalue, or an lvalue.
	void AddDerivedToBaseCastStep(QualType BaseType,
	ExprValueKind Category);

	/// Add a new step binding a reference to an object.
	///
	/// \param BindingTemporary True if we are binding a reference to a temporary
	/// object (thereby extending its lifetime); false if we are binding to an
	/// lvalue or an lvalue treated as an rvalue.
	void AddReferenceBindingStep(QualType T, bool BindingTemporary);

	/// Add a new step that makes an extraneous copy of the input
	/// to a temporary of the same class type.
	///
	/// This extraneous copy only occurs during reference binding in
	/// C++98/03, where we are permitted (but not required) to introduce
	/// an extra copy. At a bare minimum, we must check that we could
	/// call the copy constructor, and produce a diagnostic if the copy
	/// constructor is inaccessible or no copy constructor matches.
	//
	/// \param T The type of the temporary being created.
	void AddExtraneousCopyToTemporary(QualType T);

	/// Add a new step that makes a copy of the input to an object of
	/// the given type, as the final step in class copy-initialization.
	void AddFinalCopy(QualType T);

	/// Add a new step invoking a conversion function, which is either
	/// a constructor or a conversion function.
	void AddUserConversionStep(FunctionDecl *Function,
	DeclAccessPair FoundDecl,
	QualType T,
	bool HadMultipleCandidates);

	/// Add a new step that performs a qualification conversion to the
	/// given type.
	void AddQualificationConversionStep(QualType Ty,
	ExprValueKind Category);

	/// Add a new step that performs a function reference conversion to the
	/// given type.
	void AddFunctionReferenceConversionStep(QualType Ty);

	/// Add a new step that performs conversion from non-atomic to atomic
	/// type.
	void AddAtomicConversionStep(QualType Ty);

	/// Add a new step that applies an implicit conversion sequence.
	void AddConversionSequenceStep(const ImplicitConversionSequence &ICS,
	QualType T, bool TopLevelOfInitList = false);

	/// Add a list-initialization step.
	void AddListInitializationStep(QualType T);

	/// Add a constructor-initialization step.
	///
	/// \param FromInitList The constructor call is syntactically an initializer
	/// list.
	/// \param AsInitList The constructor is called as an init list constructor.
	void AddConstructorInitializationStep(DeclAccessPair FoundDecl,
	CXXConstructorDecl *Constructor,
	QualType T,
	bool HadMultipleCandidates,
	bool FromInitList, bool AsInitList);

	/// Add a zero-initialization step.
	void AddZeroInitializationStep(QualType T);

	/// Add a C assignment step.
	//
	// FIXME: It isn't clear whether this should ever be needed;
	// ideally, we would handle everything needed in C in the common
	// path. However, that isn't the case yet.
	void AddCAssignmentStep(QualType T);

	/// Add a string init step.
	void AddStringInitStep(QualType T);

	/// Add an Objective-C object conversion step, which is
	/// always a no-op.
	void AddObjCObjectConversionStep(QualType T);

	/// Add an array initialization loop step.
	void AddArrayInitLoopStep(QualType T, QualType EltTy);

	/// Add an array initialization step.
	void AddArrayInitStep(QualType T, bool IsGNUExtension);

	/// Add a parenthesized array initialization step.
	void AddParenthesizedArrayInitStep(QualType T);

	/// Add a step to pass an object by indirect copy-restore.
	void AddPassByIndirectCopyRestoreStep(QualType T, bool shouldCopy);

	/// Add a step to "produce" an Objective-C object (by
	/// retaining it).
	void AddProduceObjCObjectStep(QualType T);

	/// Add a step to construct a std::initializer_list object from an
	/// initializer list.
	void AddStdInitializerListConstructionStep(QualType T);

	/// Add a step to initialize an OpenCL sampler from an integer
	/// constant.
	void AddOCLSamplerInitStep(QualType T);

	/// Add a step to initialzie an OpenCL opaque type (event_t, queue_t, etc.)
	/// from a zero constant.
	void AddOCLZeroOpaqueTypeStep(QualType T);

	void AddParenthesizedListInitStep(QualType T);

	/// Add steps to unwrap a initializer list for a reference around a
	/// single element and rewrap it at the end.
	void RewrapReferenceInitList(QualType T, InitListExpr *Syntactic);

	/// Note that this initialization sequence failed.
	void SetFailed(FailureKind Failure) {
	SequenceKind = FailedSequence;
	this->Failure = Failure;
	assert((Failure != FK_Incomplete \|\| !FailedIncompleteType.isNull()) &&
	"Incomplete type failure requires a type!");
	}

	/// Note that this initialization sequence failed due to failed
	/// overload resolution.
	void SetOverloadFailure(FailureKind Failure, OverloadingResult Result);

	/// Retrieve a reference to the candidate set when overload
	/// resolution fails.
	OverloadCandidateSet &getFailedCandidateSet() {
	return FailedCandidateSet;
	}

	/// Get the overloading result, for when the initialization
	/// sequence failed due to a bad overload.
	OverloadingResult getFailedOverloadResult() const {
	return FailedOverloadResult;
	}

	/// Note that this initialization sequence failed due to an
	/// incomplete type.
	void setIncompleteTypeFailure(QualType IncompleteType) {
	FailedIncompleteType = IncompleteType;
	SetFailed(FK_Incomplete);
	}

	/// Determine why initialization failed.
	FailureKind getFailureKind() const {
	assert(Failed() && "Not an initialization failure!");
	return Failure;
	}

	/// Dump a representation of this initialization sequence to
	/// the given stream, for debugging purposes.
	void dump(raw_ostream &OS) const;

	/// Dump a representation of this initialization sequence to
	/// standard error, for debugging purposes.
	void dump() const;
	};

	} // namespace clang

	#endif // LLVM_CLANG_SEMA_INITIALIZATION_H
	diff --git a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
	index 2884fe660422..8054eb2e12d3 100644
	--- a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
	+++ b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
	@@ -1,13455 +1,13456 @@
	//===- ASTContext.cpp - Context to hold long-lived AST nodes --------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the ASTContext interface.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ASTContext.h"
	#include "CXXABI.h"
	#include "Interp/Context.h"
	#include "clang/AST/APValue.h"
	#include "clang/AST/ASTConcept.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/ASTTypeTraits.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/AttrIterator.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/AST/Comment.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclBase.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclContextInternals.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclOpenMP.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/DeclarationName.h"
	#include "clang/AST/DependenceFlags.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprConcepts.h"
	#include "clang/AST/ExternalASTSource.h"
	#include "clang/AST/Mangle.h"
	#include "clang/AST/MangleNumberingContext.h"
	#include "clang/AST/NestedNameSpecifier.h"
	#include "clang/AST/ParentMapContext.h"
	#include "clang/AST/RawCommentList.h"
	#include "clang/AST/RecordLayout.h"
	#include "clang/AST/Stmt.h"
	#include "clang/AST/TemplateBase.h"
	#include "clang/AST/TemplateName.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/AST/UnresolvedSet.h"
	#include "clang/AST/VTableBuilder.h"
	#include "clang/Basic/AddressSpaces.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/CommentOptions.h"
	#include "clang/Basic/ExceptionSpecificationType.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/Linkage.h"
	#include "clang/Basic/Module.h"
	#include "clang/Basic/NoSanitizeList.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/Basic/TargetCXXABI.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Basic/XRayLists.h"
	#include "llvm/ADT/APFixedPoint.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/FoldingSet.h"
	#include "llvm/ADT/PointerUnion.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/Support/Capacity.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/MD5.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <cstdlib>
	#include <map>
	#include <memory>
	#include <optional>
	#include <string>
	#include <tuple>
	#include <utility>

	using namespace clang;

	enum FloatingRank {
	BFloat16Rank,
	Float16Rank,
	HalfRank,
	FloatRank,
	DoubleRank,
	LongDoubleRank,
	Float128Rank,
	Ibm128Rank
	};

	/// \returns location that is relevant when searching for Doc comments related
	/// to \p D.
	static SourceLocation getDeclLocForCommentSearch(const Decl *D,
	SourceManager &SourceMgr) {
	assert(D);

	// User can not attach documentation to implicit declarations.
	if (D->isImplicit())
	return {};

	// User can not attach documentation to implicit instantiations.
	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}

	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	if (VD->isStaticDataMember() &&
	VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}

	if (const auto *CRD = dyn_cast<CXXRecordDecl>(D)) {
	if (CRD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}

	if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
	TemplateSpecializationKind TSK = CTSD->getSpecializationKind();
	if (TSK == TSK_ImplicitInstantiation \|\|
	TSK == TSK_Undeclared)
	return {};
	}

	if (const auto *ED = dyn_cast<EnumDecl>(D)) {
	if (ED->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}
	if (const auto *TD = dyn_cast<TagDecl>(D)) {
	// When tag declaration (but not definition!) is part of the
	// decl-specifier-seq of some other declaration, it doesn't get comment
	if (TD->isEmbeddedInDeclarator() && !TD->isCompleteDefinition())
	return {};
	}
	// TODO: handle comments for function parameters properly.
	if (isa<ParmVarDecl>(D))
	return {};

	// TODO: we could look up template parameter documentation in the template
	// documentation.
	if (isa<TemplateTypeParmDecl>(D) \|\|
	isa<NonTypeTemplateParmDecl>(D) \|\|
	isa<TemplateTemplateParmDecl>(D))
	return {};

	// Find declaration location.
	// For Objective-C declarations we generally don't expect to have multiple
	// declarators, thus use declaration starting location as the "declaration
	// location".
	// For all other declarations multiple declarators are used quite frequently,
	// so we use the location of the identifier as the "declaration location".
	if (isa<ObjCMethodDecl>(D) \|\| isa<ObjCContainerDecl>(D) \|\|
	isa<ObjCPropertyDecl>(D) \|\|
	isa<RedeclarableTemplateDecl>(D) \|\|
	isa<ClassTemplateSpecializationDecl>(D) \|\|
	// Allow association with Y across {} in `typedef struct X {} Y`.
	isa<TypedefDecl>(D))
	return D->getBeginLoc();

	const SourceLocation DeclLoc = D->getLocation();
	if (DeclLoc.isMacroID()) {
	if (isa<TypedefDecl>(D)) {
	// If location of the typedef name is in a macro, it is because being
	// declared via a macro. Try using declaration's starting location as
	// the "declaration location".
	return D->getBeginLoc();
	}

	if (const auto *TD = dyn_cast<TagDecl>(D)) {
	// If location of the tag decl is inside a macro, but the spelling of
	// the tag name comes from a macro argument, it looks like a special
	// macro like NS_ENUM is being used to define the tag decl. In that
	// case, adjust the source location to the expansion loc so that we can
	// attach the comment to the tag decl.
	if (SourceMgr.isMacroArgExpansion(DeclLoc) && TD->isCompleteDefinition())
	return SourceMgr.getExpansionLoc(DeclLoc);
	}
	}

	return DeclLoc;
	}

	RawComment *ASTContext::getRawCommentForDeclNoCacheImpl(
	const Decl *D, const SourceLocation RepresentativeLocForDecl,
	const std::map<unsigned, RawComment *> &CommentsInTheFile) const {
	// If the declaration doesn't map directly to a location in a file, we
	// can't find the comment.
	if (RepresentativeLocForDecl.isInvalid() \|\|
	!RepresentativeLocForDecl.isFileID())
	return nullptr;

	// If there are no comments anywhere, we won't find anything.
	if (CommentsInTheFile.empty())
	return nullptr;

	// Decompose the location for the declaration and find the beginning of the
	// file buffer.
	const std::pair<FileID, unsigned> DeclLocDecomp =
	SourceMgr.getDecomposedLoc(RepresentativeLocForDecl);

	// Slow path.
	auto OffsetCommentBehindDecl =
	CommentsInTheFile.lower_bound(DeclLocDecomp.second);

	// First check whether we have a trailing comment.
	if (OffsetCommentBehindDecl != CommentsInTheFile.end()) {
	RawComment *CommentBehindDecl = OffsetCommentBehindDecl->second;
	if ((CommentBehindDecl->isDocumentation() \|\|
	LangOpts.CommentOpts.ParseAllComments) &&
	CommentBehindDecl->isTrailingComment() &&
	(isa<FieldDecl>(D) \|\| isa<EnumConstantDecl>(D) \|\| isa<VarDecl>(D) \|\|
	isa<ObjCMethodDecl>(D) \|\| isa<ObjCPropertyDecl>(D))) {

	// Check that Doxygen trailing comment comes after the declaration, starts
	// on the same line and in the same file as the declaration.
	if (SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) ==
	Comments.getCommentBeginLine(CommentBehindDecl, DeclLocDecomp.first,
	OffsetCommentBehindDecl->first)) {
	return CommentBehindDecl;
	}
	}
	}

	// The comment just after the declaration was not a trailing comment.
	// Let's look at the previous comment.
	if (OffsetCommentBehindDecl == CommentsInTheFile.begin())
	return nullptr;

	auto OffsetCommentBeforeDecl = --OffsetCommentBehindDecl;
	RawComment *CommentBeforeDecl = OffsetCommentBeforeDecl->second;

	// Check that we actually have a non-member Doxygen comment.
	if (!(CommentBeforeDecl->isDocumentation() \|\|
	LangOpts.CommentOpts.ParseAllComments) \|\|
	CommentBeforeDecl->isTrailingComment())
	return nullptr;

	// Decompose the end of the comment.
	const unsigned CommentEndOffset =
	Comments.getCommentEndOffset(CommentBeforeDecl);

	// Get the corresponding buffer.
	bool Invalid = false;
	const char *Buffer = SourceMgr.getBufferData(DeclLocDecomp.first,
	&Invalid).data();
	if (Invalid)
	return nullptr;

	// Extract text between the comment and declaration.
	StringRef Text(Buffer + CommentEndOffset,
	DeclLocDecomp.second - CommentEndOffset);

	// There should be no other declarations or preprocessor directives between
	// comment and declaration.
	if (Text.find_first_of(";{}#@") != StringRef::npos)
	return nullptr;

	return CommentBeforeDecl;
	}

	RawComment ASTContext::getRawCommentForDeclNoCache(const Decl D) const {
	const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr);

	// If the declaration doesn't map directly to a location in a file, we
	// can't find the comment.
	if (DeclLoc.isInvalid() \|\| !DeclLoc.isFileID())
	return nullptr;

	if (ExternalSource && !CommentsLoaded) {
	ExternalSource->ReadComments();
	CommentsLoaded = true;
	}

	if (Comments.empty())
	return nullptr;

	const FileID File = SourceMgr.getDecomposedLoc(DeclLoc).first;
	if (!File.isValid()) {
	return nullptr;
	}
	const auto CommentsInThisFile = Comments.getCommentsInFile(File);
	if (!CommentsInThisFile \|\| CommentsInThisFile->empty())
	return nullptr;

	return getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile);
	}

	void ASTContext::addComment(const RawComment &RC) {
	assert(LangOpts.RetainCommentsFromSystemHeaders \|\|
	!SourceMgr.isInSystemHeader(RC.getSourceRange().getBegin()));
	Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc);
	}

	/// If we have a 'templated' declaration for a template, adjust 'D' to
	/// refer to the actual template.
	/// If we have an implicit instantiation, adjust 'D' to refer to template.
	static const Decl &adjustDeclToTemplate(const Decl &D) {
	if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
	// Is this function declaration part of a function template?
	if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
	return *FTD;

	// Nothing to do if function is not an implicit instantiation.
	if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
	return D;

	// Function is an implicit instantiation of a function template?
	if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
	return *FTD;

	// Function is instantiated from a member definition of a class template?
	if (const FunctionDecl *MemberDecl =
	FD->getInstantiatedFromMemberFunction())
	return *MemberDecl;

	return D;
	}
	if (const auto *VD = dyn_cast<VarDecl>(&D)) {
	// Static data member is instantiated from a member definition of a class
	// template?
	if (VD->isStaticDataMember())
	if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
	return *MemberDecl;

	return D;
	}
	if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
	// Is this class declaration part of a class template?
	if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
	return *CTD;

	// Class is an implicit instantiation of a class template or partial
	// specialization?
	if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
	if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
	return D;
	llvm::PointerUnion<ClassTemplateDecl *,
	ClassTemplatePartialSpecializationDecl *>
	PU = CTSD->getSpecializedTemplateOrPartial();
	return PU.is<ClassTemplateDecl *>()
	? static_cast<const Decl >(PU.get<ClassTemplateDecl *>())
	: static_cast<const Decl >(
	PU.get<ClassTemplatePartialSpecializationDecl *>());
	}

	// Class is instantiated from a member definition of a class template?
	if (const MemberSpecializationInfo *Info =
	CRD->getMemberSpecializationInfo())
	return *Info->getInstantiatedFrom();

	return D;
	}
	if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
	// Enum is instantiated from a member definition of a class template?
	if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
	return *MemberDecl;

	return D;
	}
	// FIXME: Adjust alias templates?
	return D;
	}

	const RawComment *ASTContext::getRawCommentForAnyRedecl(
	const Decl *D,
	const Decl **OriginalDecl) const {
	if (!D) {
	if (OriginalDecl)
	OriginalDecl = nullptr;
	return nullptr;
	}

	D = &adjustDeclToTemplate(*D);

	// Any comment directly attached to D?
	{
	auto DeclComment = DeclRawComments.find(D);
	if (DeclComment != DeclRawComments.end()) {
	if (OriginalDecl)
	*OriginalDecl = D;
	return DeclComment->second;
	}
	}

	// Any comment attached to any redeclaration of D?
	const Decl *CanonicalD = D->getCanonicalDecl();
	if (!CanonicalD)
	return nullptr;

	{
	auto RedeclComment = RedeclChainComments.find(CanonicalD);
	if (RedeclComment != RedeclChainComments.end()) {
	if (OriginalDecl)
	*OriginalDecl = RedeclComment->second;
	auto CommentAtRedecl = DeclRawComments.find(RedeclComment->second);
	assert(CommentAtRedecl != DeclRawComments.end() &&
	"This decl is supposed to have comment attached.");
	return CommentAtRedecl->second;
	}
	}

	// Any redeclarations of D that we haven't checked for comments yet?
	// We can't use DenseMap::iterator directly since it'd get invalid.
	auto LastCheckedRedecl = [this, CanonicalD]() -> const Decl * {
	auto LookupRes = CommentlessRedeclChains.find(CanonicalD);
	if (LookupRes != CommentlessRedeclChains.end())
	return LookupRes->second;
	return nullptr;
	}();

	for (const auto Redecl : D->redecls()) {
	assert(Redecl);
	// Skip all redeclarations that have been checked previously.
	if (LastCheckedRedecl) {
	if (LastCheckedRedecl == Redecl) {
	LastCheckedRedecl = nullptr;
	}
	continue;
	}
	const RawComment *RedeclComment = getRawCommentForDeclNoCache(Redecl);
	if (RedeclComment) {
	cacheRawCommentForDecl(Redecl, RedeclComment);
	if (OriginalDecl)
	*OriginalDecl = Redecl;
	return RedeclComment;
	}
	CommentlessRedeclChains[CanonicalD] = Redecl;
	}

	if (OriginalDecl)
	*OriginalDecl = nullptr;
	return nullptr;
	}

	void ASTContext::cacheRawCommentForDecl(const Decl &OriginalD,
	const RawComment &Comment) const {
	assert(Comment.isDocumentation() \|\| LangOpts.CommentOpts.ParseAllComments);
	DeclRawComments.try_emplace(&OriginalD, &Comment);
	const Decl *const CanonicalDecl = OriginalD.getCanonicalDecl();
	RedeclChainComments.try_emplace(CanonicalDecl, &OriginalD);
	CommentlessRedeclChains.erase(CanonicalDecl);
	}

	static void addRedeclaredMethods(const ObjCMethodDecl *ObjCMethod,
	SmallVectorImpl<const NamedDecl *> &Redeclared) {
	const DeclContext *DC = ObjCMethod->getDeclContext();
	if (const auto *IMD = dyn_cast<ObjCImplDecl>(DC)) {
	const ObjCInterfaceDecl *ID = IMD->getClassInterface();
	if (!ID)
	return;
	// Add redeclared method here.
	for (const auto *Ext : ID->known_extensions()) {
	if (ObjCMethodDecl *RedeclaredMethod =
	Ext->getMethod(ObjCMethod->getSelector(),
	ObjCMethod->isInstanceMethod()))
	Redeclared.push_back(RedeclaredMethod);
	}
	}
	}

	void ASTContext::attachCommentsToJustParsedDecls(ArrayRef<Decl *> Decls,
	const Preprocessor *PP) {
	if (Comments.empty() \|\| Decls.empty())
	return;

	FileID File;
	for (Decl *D : Decls) {
	SourceLocation Loc = D->getLocation();
	if (Loc.isValid()) {
	// See if there are any new comments that are not attached to a decl.
	// The location doesn't have to be precise - we care only about the file.
	File = SourceMgr.getDecomposedLoc(Loc).first;
	break;
	}
	}

	if (File.isInvalid())
	return;

	auto CommentsInThisFile = Comments.getCommentsInFile(File);
	if (!CommentsInThisFile \|\| CommentsInThisFile->empty() \|\|
	CommentsInThisFile->rbegin()->second->isAttached())
	return;

	// There is at least one comment not attached to a decl.
	// Maybe it should be attached to one of Decls?
	//
	// Note that this way we pick up not only comments that precede the
	// declaration, but also comments that follow the declaration -- thanks to
	// the lookahead in the lexer: we've consumed the semicolon and looked
	// ahead through comments.

	for (const Decl *D : Decls) {
	assert(D);
	if (D->isInvalidDecl())
	continue;

	D = &adjustDeclToTemplate(*D);

	const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr);

	if (DeclLoc.isInvalid() \|\| !DeclLoc.isFileID())
	continue;

	if (DeclRawComments.count(D) > 0)
	continue;

	if (RawComment *const DocComment =
	getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile)) {
	cacheRawCommentForDecl(D, DocComment);
	comments::FullComment FC = DocComment->parse(this, PP, D);
	ParsedComments[D->getCanonicalDecl()] = FC;
	}
	}
	}

	comments::FullComment ASTContext::cloneFullComment(comments::FullComment FC,
	const Decl *D) const {
	auto ThisDeclInfo = new (this) comments::DeclInfo;
	ThisDeclInfo->CommentDecl = D;
	ThisDeclInfo->IsFilled = false;
	ThisDeclInfo->fill();
	ThisDeclInfo->CommentDecl = FC->getDecl();
	if (!ThisDeclInfo->TemplateParameters)
	ThisDeclInfo->TemplateParameters = FC->getDeclInfo()->TemplateParameters;
	comments::FullComment *CFC =
	new (*this) comments::FullComment(FC->getBlocks(),
	ThisDeclInfo);
	return CFC;
	}

	comments::FullComment ASTContext::getLocalCommentForDeclUncached(const Decl D) const {
	const RawComment *RC = getRawCommentForDeclNoCache(D);
	return RC ? RC->parse(*this, nullptr, D) : nullptr;
	}

	comments::FullComment *ASTContext::getCommentForDecl(
	const Decl *D,
	const Preprocessor *PP) const {
	if (!D \|\| D->isInvalidDecl())
	return nullptr;
	D = &adjustDeclToTemplate(*D);

	const Decl *Canonical = D->getCanonicalDecl();
	llvm::DenseMap<const Decl , comments::FullComment >::iterator Pos =
	ParsedComments.find(Canonical);

	if (Pos != ParsedComments.end()) {
	if (Canonical != D) {
	comments::FullComment *FC = Pos->second;
	comments::FullComment *CFC = cloneFullComment(FC, D);
	return CFC;
	}
	return Pos->second;
	}

	const Decl *OriginalDecl = nullptr;

	const RawComment *RC = getRawCommentForAnyRedecl(D, &OriginalDecl);
	if (!RC) {
	if (isa<ObjCMethodDecl>(D) \|\| isa<FunctionDecl>(D)) {
	SmallVector<const NamedDecl*, 8> Overridden;
	const auto *OMD = dyn_cast<ObjCMethodDecl>(D);
	if (OMD && OMD->isPropertyAccessor())
	if (const ObjCPropertyDecl *PDecl = OMD->findPropertyDecl())
	if (comments::FullComment *FC = getCommentForDecl(PDecl, PP))
	return cloneFullComment(FC, D);
	if (OMD)
	addRedeclaredMethods(OMD, Overridden);
	getOverriddenMethods(dyn_cast<NamedDecl>(D), Overridden);
	for (unsigned i = 0, e = Overridden.size(); i < e; i++)
	if (comments::FullComment *FC = getCommentForDecl(Overridden[i], PP))
	return cloneFullComment(FC, D);
	}
	else if (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
	// Attach any tag type's documentation to its typedef if latter
	// does not have one of its own.
	QualType QT = TD->getUnderlyingType();
	if (const auto *TT = QT->getAs<TagType>())
	if (const Decl *TD = TT->getDecl())
	if (comments::FullComment *FC = getCommentForDecl(TD, PP))
	return cloneFullComment(FC, D);
	}
	else if (const auto *IC = dyn_cast<ObjCInterfaceDecl>(D)) {
	while (IC->getSuperClass()) {
	IC = IC->getSuperClass();
	if (comments::FullComment *FC = getCommentForDecl(IC, PP))
	return cloneFullComment(FC, D);
	}
	}
	else if (const auto *CD = dyn_cast<ObjCCategoryDecl>(D)) {
	if (const ObjCInterfaceDecl *IC = CD->getClassInterface())
	if (comments::FullComment *FC = getCommentForDecl(IC, PP))
	return cloneFullComment(FC, D);
	}
	else if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
	if (!(RD = RD->getDefinition()))
	return nullptr;
	// Check non-virtual bases.
	for (const auto &I : RD->bases()) {
	if (I.isVirtual() \|\| (I.getAccessSpecifier() != AS_public))
	continue;
	QualType Ty = I.getType();
	if (Ty.isNull())
	continue;
	if (const CXXRecordDecl *NonVirtualBase = Ty->getAsCXXRecordDecl()) {
	if (!(NonVirtualBase= NonVirtualBase->getDefinition()))
	continue;

	if (comments::FullComment *FC = getCommentForDecl((NonVirtualBase), PP))
	return cloneFullComment(FC, D);
	}
	}
	// Check virtual bases.
	for (const auto &I : RD->vbases()) {
	if (I.getAccessSpecifier() != AS_public)
	continue;
	QualType Ty = I.getType();
	if (Ty.isNull())
	continue;
	if (const CXXRecordDecl *VirtualBase = Ty->getAsCXXRecordDecl()) {
	if (!(VirtualBase= VirtualBase->getDefinition()))
	continue;
	if (comments::FullComment *FC = getCommentForDecl((VirtualBase), PP))
	return cloneFullComment(FC, D);
	}
	}
	}
	return nullptr;
	}

	// If the RawComment was attached to other redeclaration of this Decl, we
	// should parse the comment in context of that other Decl. This is important
	// because comments can contain references to parameter names which can be
	// different across redeclarations.
	if (D != OriginalDecl && OriginalDecl)
	return getCommentForDecl(OriginalDecl, PP);

	comments::FullComment FC = RC->parse(this, PP, D);
	ParsedComments[Canonical] = FC;
	return FC;
	}

	void
	ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID,
	const ASTContext &C,
	TemplateTemplateParmDecl *Parm) {
	ID.AddInteger(Parm->getDepth());
	ID.AddInteger(Parm->getPosition());
	ID.AddBoolean(Parm->isParameterPack());

	TemplateParameterList *Params = Parm->getTemplateParameters();
	ID.AddInteger(Params->size());
	for (TemplateParameterList::const_iterator P = Params->begin(),
	PEnd = Params->end();
	P != PEnd; ++P) {
	if (const auto TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
	ID.AddInteger(0);
	ID.AddBoolean(TTP->isParameterPack());
	const TypeConstraint *TC = TTP->getTypeConstraint();
	ID.AddBoolean(TC != nullptr);
	if (TC)
	TC->getImmediatelyDeclaredConstraint()->Profile(ID, C,
	/Canonical=/true);
	if (TTP->isExpandedParameterPack()) {
	ID.AddBoolean(true);
	ID.AddInteger(TTP->getNumExpansionParameters());
	} else
	ID.AddBoolean(false);
	continue;
	}

	if (const auto NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
	ID.AddInteger(1);
	ID.AddBoolean(NTTP->isParameterPack());
	const Expr *TC = NTTP->getPlaceholderTypeConstraint();
	ID.AddBoolean(TC != nullptr);
	ID.AddPointer(NTTP->getType().getCanonicalType().getAsOpaquePtr());
	if (TC)
	TC->Profile(ID, C, /Canonical=/true);
	if (NTTP->isExpandedParameterPack()) {
	ID.AddBoolean(true);
	ID.AddInteger(NTTP->getNumExpansionTypes());
	for (unsigned I = 0, N = NTTP->getNumExpansionTypes(); I != N; ++I) {
	QualType T = NTTP->getExpansionType(I);
	ID.AddPointer(T.getCanonicalType().getAsOpaquePtr());
	}
	} else
	ID.AddBoolean(false);
	continue;
	}

	auto TTP = cast<TemplateTemplateParmDecl>(P);
	ID.AddInteger(2);
	Profile(ID, C, TTP);
	}
	Expr *RequiresClause = Parm->getTemplateParameters()->getRequiresClause();
	ID.AddBoolean(RequiresClause != nullptr);
	if (RequiresClause)
	RequiresClause->Profile(ID, C, /Canonical=/true);
	}

	static Expr *
	canonicalizeImmediatelyDeclaredConstraint(const ASTContext &C, Expr *IDC,
	QualType ConstrainedType) {
	// This is a bit ugly - we need to form a new immediately-declared
	// constraint that references the new parameter; this would ideally
	// require semantic analysis (e.g. template<C T> struct S {}; - the
	// converted arguments of C<T> could be an argument pack if C is
	// declared as template<typename... T> concept C = ...).
	// We don't have semantic analysis here so we dig deep into the
	// ready-made constraint expr and change the thing manually.
	ConceptSpecializationExpr *CSE;
	if (const auto *Fold = dyn_cast<CXXFoldExpr>(IDC))
	CSE = cast<ConceptSpecializationExpr>(Fold->getLHS());
	else
	CSE = cast<ConceptSpecializationExpr>(IDC);
	ArrayRef<TemplateArgument> OldConverted = CSE->getTemplateArguments();
	SmallVector<TemplateArgument, 3> NewConverted;
	NewConverted.reserve(OldConverted.size());
	if (OldConverted.front().getKind() == TemplateArgument::Pack) {
	// The case:
	// template<typename... T> concept C = true;
	// template<C<int> T> struct S; -> constraint is C<{T, int}>
	NewConverted.push_back(ConstrainedType);
	llvm::append_range(NewConverted,
	OldConverted.front().pack_elements().drop_front(1));
	TemplateArgument NewPack(NewConverted);

	NewConverted.clear();
	NewConverted.push_back(NewPack);
	assert(OldConverted.size() == 1 &&
	"Template parameter pack should be the last parameter");
	} else {
	assert(OldConverted.front().getKind() == TemplateArgument::Type &&
	"Unexpected first argument kind for immediately-declared "
	"constraint");
	NewConverted.push_back(ConstrainedType);
	llvm::append_range(NewConverted, OldConverted.drop_front(1));
	}
	auto *CSD = ImplicitConceptSpecializationDecl::Create(
	C, CSE->getNamedConcept()->getDeclContext(),
	CSE->getNamedConcept()->getLocation(), NewConverted);

	Expr *NewIDC = ConceptSpecializationExpr::Create(
	- C, CSE->getNamedConcept(), CSD, nullptr, CSE->isInstantiationDependent(),
	+ C, CSE->getNamedConcept(), CSE->getTemplateArgsAsWritten(), CSD,
	+ /Satisfaction=/nullptr, CSE->isInstantiationDependent(),
	CSE->containsUnexpandedParameterPack());

	if (auto *OrigFold = dyn_cast<CXXFoldExpr>(IDC))
	NewIDC = new (C) CXXFoldExpr(
	OrigFold->getType(), /Callee/ nullptr, SourceLocation(), NewIDC,
	BinaryOperatorKind::BO_LAnd, SourceLocation(), /RHS=/nullptr,
	SourceLocation(), /NumExpansions=/std::nullopt);
	return NewIDC;
	}

	TemplateTemplateParmDecl *
	ASTContext::getCanonicalTemplateTemplateParmDecl(
	TemplateTemplateParmDecl *TTP) const {
	// Check if we already have a canonical template template parameter.
	llvm::FoldingSetNodeID ID;
	CanonicalTemplateTemplateParm::Profile(ID, *this, TTP);
	void *InsertPos = nullptr;
	CanonicalTemplateTemplateParm *Canonical
	= CanonTemplateTemplateParms.FindNodeOrInsertPos(ID, InsertPos);
	if (Canonical)
	return Canonical->getParam();

	// Build a canonical template parameter list.
	TemplateParameterList *Params = TTP->getTemplateParameters();
	SmallVector<NamedDecl *, 4> CanonParams;
	CanonParams.reserve(Params->size());
	for (TemplateParameterList::const_iterator P = Params->begin(),
	PEnd = Params->end();
	P != PEnd; ++P) {
	if (const auto TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
	TemplateTypeParmDecl *NewTTP = TemplateTypeParmDecl::Create(
	*this, getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
	TTP->getDepth(), TTP->getIndex(), nullptr, false,
	TTP->isParameterPack(), TTP->hasTypeConstraint(),
	TTP->isExpandedParameterPack()
	? std::optional<unsigned>(TTP->getNumExpansionParameters())
	: std::nullopt);
	if (const auto *TC = TTP->getTypeConstraint()) {
	QualType ParamAsArgument(NewTTP->getTypeForDecl(), 0);
	Expr *NewIDC = canonicalizeImmediatelyDeclaredConstraint(
	*this, TC->getImmediatelyDeclaredConstraint(),
	ParamAsArgument);
	NewTTP->setTypeConstraint(
	NestedNameSpecifierLoc(),
	DeclarationNameInfo(TC->getNamedConcept()->getDeclName(),
	SourceLocation()), /FoundDecl=/nullptr,
	// Actually canonicalizing a TemplateArgumentLoc is difficult so we
	// simply omit the ArgsAsWritten
	TC->getNamedConcept(), /ArgsAsWritten=/nullptr, NewIDC);
	}
	CanonParams.push_back(NewTTP);
	} else if (const auto NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
	QualType T = getCanonicalType(NTTP->getType());
	TypeSourceInfo *TInfo = getTrivialTypeSourceInfo(T);
	NonTypeTemplateParmDecl *Param;
	if (NTTP->isExpandedParameterPack()) {
	SmallVector<QualType, 2> ExpandedTypes;
	SmallVector<TypeSourceInfo *, 2> ExpandedTInfos;
	for (unsigned I = 0, N = NTTP->getNumExpansionTypes(); I != N; ++I) {
	ExpandedTypes.push_back(getCanonicalType(NTTP->getExpansionType(I)));
	ExpandedTInfos.push_back(
	getTrivialTypeSourceInfo(ExpandedTypes.back()));
	}

	Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(),
	SourceLocation(),
	NTTP->getDepth(),
	NTTP->getPosition(), nullptr,
	T,
	TInfo,
	ExpandedTypes,
	ExpandedTInfos);
	} else {
	Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(),
	SourceLocation(),
	NTTP->getDepth(),
	NTTP->getPosition(), nullptr,
	T,
	NTTP->isParameterPack(),
	TInfo);
	}
	if (AutoType *AT = T->getContainedAutoType()) {
	if (AT->isConstrained()) {
	Param->setPlaceholderTypeConstraint(
	canonicalizeImmediatelyDeclaredConstraint(
	*this, NTTP->getPlaceholderTypeConstraint(), T));
	}
	}
	CanonParams.push_back(Param);

	} else
	CanonParams.push_back(getCanonicalTemplateTemplateParmDecl(
	cast<TemplateTemplateParmDecl>(*P)));
	}

	Expr *CanonRequiresClause = nullptr;
	if (Expr *RequiresClause = TTP->getTemplateParameters()->getRequiresClause())
	CanonRequiresClause = RequiresClause;

	TemplateTemplateParmDecl *CanonTTP
	= TemplateTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(), TTP->getDepth(),
	TTP->getPosition(),
	TTP->isParameterPack(),
	nullptr,
	TemplateParameterList::Create(*this, SourceLocation(),
	SourceLocation(),
	CanonParams,
	SourceLocation(),
	CanonRequiresClause));

	// Get the new insert position for the node we care about.
	Canonical = CanonTemplateTemplateParms.FindNodeOrInsertPos(ID, InsertPos);
	assert(!Canonical && "Shouldn't be in the map!");
	(void)Canonical;

	// Create the canonical template template parameter entry.
	Canonical = new (*this) CanonicalTemplateTemplateParm(CanonTTP);
	CanonTemplateTemplateParms.InsertNode(Canonical, InsertPos);
	return CanonTTP;
	}

	TargetCXXABI::Kind ASTContext::getCXXABIKind() const {
	auto Kind = getTargetInfo().getCXXABI().getKind();
	return getLangOpts().CXXABI.value_or(Kind);
	}

	CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
	if (!LangOpts.CPlusPlus) return nullptr;

	switch (getCXXABIKind()) {
	case TargetCXXABI::AppleARM64:
	case TargetCXXABI::Fuchsia:
	case TargetCXXABI::GenericARM: // Same as Itanium at this level
	case TargetCXXABI::iOS:
	case TargetCXXABI::WatchOS:
	case TargetCXXABI::GenericAArch64:
	case TargetCXXABI::GenericMIPS:
	case TargetCXXABI::GenericItanium:
	case TargetCXXABI::WebAssembly:
	case TargetCXXABI::XL:
	return CreateItaniumCXXABI(*this);
	case TargetCXXABI::Microsoft:
	return CreateMicrosoftCXXABI(*this);
	}
	llvm_unreachable("Invalid CXXABI type!");
	}

	interp::Context &ASTContext::getInterpContext() {
	if (!InterpContext) {
	InterpContext.reset(new interp::Context(*this));
	}
	return *InterpContext.get();
	}

	ParentMapContext &ASTContext::getParentMapContext() {
	if (!ParentMapCtx)
	ParentMapCtx.reset(new ParentMapContext(*this));
	return *ParentMapCtx.get();
	}

	static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI,
	const LangOptions &LangOpts) {
	switch (LangOpts.getAddressSpaceMapMangling()) {
	case LangOptions::ASMM_Target:
	return TI.useAddressSpaceMapMangling();
	case LangOptions::ASMM_On:
	return true;
	case LangOptions::ASMM_Off:
	return false;
	}
	llvm_unreachable("getAddressSpaceMapMangling() doesn't cover anything.");
	}

	ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
	IdentifierTable &idents, SelectorTable &sels,
	Builtin::Context &builtins, TranslationUnitKind TUKind)
	: ConstantArrayTypes(this_(), ConstantArrayTypesLog2InitSize),
	FunctionProtoTypes(this_(), FunctionProtoTypesLog2InitSize),
	TemplateSpecializationTypes(this_()),
	DependentTemplateSpecializationTypes(this_()), AutoTypes(this_()),
	SubstTemplateTemplateParmPacks(this_()),
	CanonTemplateTemplateParms(this_()), SourceMgr(SM), LangOpts(LOpts),
	NoSanitizeL(new NoSanitizeList(LangOpts.NoSanitizeFiles, SM)),
	XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles,
	LangOpts.XRayNeverInstrumentFiles,
	LangOpts.XRayAttrListFiles, SM)),
	ProfList(new ProfileList(LangOpts.ProfileListFiles, SM)),
	PrintingPolicy(LOpts), Idents(idents), Selectors(sels),
	BuiltinInfo(builtins), TUKind(TUKind), DeclarationNames(*this),
	Comments(SM), CommentCommandTraits(BumpAlloc, LOpts.CommentOpts),
	CompCategories(this_()), LastSDM(nullptr, 0) {
	addTranslationUnitDecl();
	}

	void ASTContext::cleanup() {
	// Release the DenseMaps associated with DeclContext objects.
	// FIXME: Is this the ideal solution?
	ReleaseDeclContextMaps();

	// Call all of the deallocation functions on all of their targets.
	for (auto &Pair : Deallocations)
	(Pair.first)(Pair.second);
	Deallocations.clear();

	// ASTRecordLayout objects in ASTRecordLayouts must always be destroyed
	// because they can contain DenseMaps.
	for (llvm::DenseMap<const ObjCContainerDecl*,
	const ASTRecordLayout*>::iterator
	I = ObjCLayouts.begin(), E = ObjCLayouts.end(); I != E; )
	// Increment in loop to prevent using deallocated memory.
	if (auto R = const_cast<ASTRecordLayout >((I++)->second))
	R->Destroy(*this);
	ObjCLayouts.clear();

	for (llvm::DenseMap<const RecordDecl, const ASTRecordLayout>::iterator
	I = ASTRecordLayouts.begin(), E = ASTRecordLayouts.end(); I != E; ) {
	// Increment in loop to prevent using deallocated memory.
	if (auto R = const_cast<ASTRecordLayout >((I++)->second))
	R->Destroy(*this);
	}
	ASTRecordLayouts.clear();

	for (llvm::DenseMap<const Decl, AttrVec>::iterator A = DeclAttrs.begin(),
	AEnd = DeclAttrs.end();
	A != AEnd; ++A)
	A->second->~AttrVec();
	DeclAttrs.clear();

	for (const auto &Value : ModuleInitializers)
	Value.second->~PerModuleInitializers();
	ModuleInitializers.clear();
	}

	ASTContext::~ASTContext() { cleanup(); }

	void ASTContext::setTraversalScope(const std::vector<Decl *> &TopLevelDecls) {
	TraversalScope = TopLevelDecls;
	getParentMapContext().clear();
	}

	void ASTContext::AddDeallocation(void (Callback)(void ), void *Data) const {
	Deallocations.push_back({Callback, Data});
	}

	void
	ASTContext::setExternalSource(IntrusiveRefCntPtr<ExternalASTSource> Source) {
	ExternalSource = std::move(Source);
	}

	void ASTContext::PrintStats() const {
	llvm::errs() << "\n*** AST Context Stats:\n";
	llvm::errs() << " " << Types.size() << " types total.\n";

	unsigned counts[] = {
	#define TYPE(Name, Parent) 0,
	#define ABSTRACT_TYPE(Name, Parent)
	#include "clang/AST/TypeNodes.inc"
	0 // Extra
	};

	for (unsigned i = 0, e = Types.size(); i != e; ++i) {
	Type *T = Types[i];
	counts[(unsigned)T->getTypeClass()]++;
	}

	unsigned Idx = 0;
	unsigned TotalBytes = 0;
	#define TYPE(Name, Parent) \
	if (counts[Idx]) \
	llvm::errs() << " " << counts[Idx] << " " << #Name \
	<< " types, " << sizeof(Name##Type) << " each " \
	<< "(" << counts[Idx] * sizeof(Name##Type) \
	<< " bytes)\n"; \
	TotalBytes += counts[Idx] * sizeof(Name##Type); \
	++Idx;
	#define ABSTRACT_TYPE(Name, Parent)
	#include "clang/AST/TypeNodes.inc"

	llvm::errs() << "Total bytes = " << TotalBytes << "\n";

	// Implicit special member functions.
	llvm::errs() << NumImplicitDefaultConstructorsDeclared << "/"
	<< NumImplicitDefaultConstructors
	<< " implicit default constructors created\n";
	llvm::errs() << NumImplicitCopyConstructorsDeclared << "/"
	<< NumImplicitCopyConstructors
	<< " implicit copy constructors created\n";
	if (getLangOpts().CPlusPlus)
	llvm::errs() << NumImplicitMoveConstructorsDeclared << "/"
	<< NumImplicitMoveConstructors
	<< " implicit move constructors created\n";
	llvm::errs() << NumImplicitCopyAssignmentOperatorsDeclared << "/"
	<< NumImplicitCopyAssignmentOperators
	<< " implicit copy assignment operators created\n";
	if (getLangOpts().CPlusPlus)
	llvm::errs() << NumImplicitMoveAssignmentOperatorsDeclared << "/"
	<< NumImplicitMoveAssignmentOperators
	<< " implicit move assignment operators created\n";
	llvm::errs() << NumImplicitDestructorsDeclared << "/"
	<< NumImplicitDestructors
	<< " implicit destructors created\n";

	if (ExternalSource) {
	llvm::errs() << "\n";
	ExternalSource->PrintStats();
	}

	BumpAlloc.PrintStats();
	}

	void ASTContext::mergeDefinitionIntoModule(NamedDecl ND, Module M,
	bool NotifyListeners) {
	if (NotifyListeners)
	if (auto *Listener = getASTMutationListener())
	Listener->RedefinedHiddenDefinition(ND, M);

	MergedDefModules[cast<NamedDecl>(ND->getCanonicalDecl())].push_back(M);
	}

	void ASTContext::deduplicateMergedDefinitonsFor(NamedDecl *ND) {
	auto It = MergedDefModules.find(cast<NamedDecl>(ND->getCanonicalDecl()));
	if (It == MergedDefModules.end())
	return;

	auto &Merged = It->second;
	llvm::DenseSet<Module*> Found;
	for (Module *&M : Merged)
	if (!Found.insert(M).second)
	M = nullptr;
	llvm::erase_value(Merged, nullptr);
	}

	ArrayRef<Module *>
	ASTContext::getModulesWithMergedDefinition(const NamedDecl *Def) {
	auto MergedIt =
	MergedDefModules.find(cast<NamedDecl>(Def->getCanonicalDecl()));
	if (MergedIt == MergedDefModules.end())
	return std::nullopt;
	return MergedIt->second;
	}

	void ASTContext::PerModuleInitializers::resolve(ASTContext &Ctx) {
	if (LazyInitializers.empty())
	return;

	auto *Source = Ctx.getExternalSource();
	assert(Source && "lazy initializers but no external source");

	auto LazyInits = std::move(LazyInitializers);
	LazyInitializers.clear();

	for (auto ID : LazyInits)
	Initializers.push_back(Source->GetExternalDecl(ID));

	assert(LazyInitializers.empty() &&
	"GetExternalDecl for lazy module initializer added more inits");
	}

	void ASTContext::addModuleInitializer(Module M, Decl D) {
	// One special case: if we add a module initializer that imports another
	// module, and that module's only initializer is an ImportDecl, simplify.
	if (const auto *ID = dyn_cast<ImportDecl>(D)) {
	auto It = ModuleInitializers.find(ID->getImportedModule());

	// Maybe the ImportDecl does nothing at all. (Common case.)
	if (It == ModuleInitializers.end())
	return;

	// Maybe the ImportDecl only imports another ImportDecl.
	auto &Imported = *It->second;
	if (Imported.Initializers.size() + Imported.LazyInitializers.size() == 1) {
	Imported.resolve(*this);
	auto *OnlyDecl = Imported.Initializers.front();
	if (isa<ImportDecl>(OnlyDecl))
	D = OnlyDecl;
	}
	}

	auto *&Inits = ModuleInitializers[M];
	if (!Inits)
	Inits = new (*this) PerModuleInitializers;
	Inits->Initializers.push_back(D);
	}

	void ASTContext::addLazyModuleInitializers(Module *M, ArrayRef<uint32_t> IDs) {
	auto *&Inits = ModuleInitializers[M];
	if (!Inits)
	Inits = new (*this) PerModuleInitializers;
	Inits->LazyInitializers.insert(Inits->LazyInitializers.end(),
	IDs.begin(), IDs.end());
	}

	ArrayRef<Decl > ASTContext::getModuleInitializers(Module M) {
	auto It = ModuleInitializers.find(M);
	if (It == ModuleInitializers.end())
	return std::nullopt;

	auto *Inits = It->second;
	Inits->resolve(*this);
	return Inits->Initializers;
	}

	ExternCContextDecl *ASTContext::getExternCContextDecl() const {
	if (!ExternCContext)
	ExternCContext = ExternCContextDecl::Create(*this, getTranslationUnitDecl());

	return ExternCContext;
	}

	BuiltinTemplateDecl *
	ASTContext::buildBuiltinTemplateDecl(BuiltinTemplateKind BTK,
	const IdentifierInfo *II) const {
	auto *BuiltinTemplate =
	BuiltinTemplateDecl::Create(*this, getTranslationUnitDecl(), II, BTK);
	BuiltinTemplate->setImplicit();
	getTranslationUnitDecl()->addDecl(BuiltinTemplate);

	return BuiltinTemplate;
	}

	BuiltinTemplateDecl *
	ASTContext::getMakeIntegerSeqDecl() const {
	if (!MakeIntegerSeqDecl)
	MakeIntegerSeqDecl = buildBuiltinTemplateDecl(BTK__make_integer_seq,
	getMakeIntegerSeqName());
	return MakeIntegerSeqDecl;
	}

	BuiltinTemplateDecl *
	ASTContext::getTypePackElementDecl() const {
	if (!TypePackElementDecl)
	TypePackElementDecl = buildBuiltinTemplateDecl(BTK__type_pack_element,
	getTypePackElementName());
	return TypePackElementDecl;
	}

	RecordDecl *ASTContext::buildImplicitRecord(StringRef Name,
	RecordDecl::TagKind TK) const {
	SourceLocation Loc;
	RecordDecl *NewDecl;
	if (getLangOpts().CPlusPlus)
	NewDecl = CXXRecordDecl::Create(*this, TK, getTranslationUnitDecl(), Loc,
	Loc, &Idents.get(Name));
	else
	NewDecl = RecordDecl::Create(*this, TK, getTranslationUnitDecl(), Loc, Loc,
	&Idents.get(Name));
	NewDecl->setImplicit();
	NewDecl->addAttr(TypeVisibilityAttr::CreateImplicit(
	const_cast<ASTContext &>(*this), TypeVisibilityAttr::Default));
	return NewDecl;
	}

	TypedefDecl *ASTContext::buildImplicitTypedef(QualType T,
	StringRef Name) const {
	TypeSourceInfo *TInfo = getTrivialTypeSourceInfo(T);
	TypedefDecl *NewDecl = TypedefDecl::Create(
	const_cast<ASTContext &>(*this), getTranslationUnitDecl(),
	SourceLocation(), SourceLocation(), &Idents.get(Name), TInfo);
	NewDecl->setImplicit();
	return NewDecl;
	}

	TypedefDecl *ASTContext::getInt128Decl() const {
	if (!Int128Decl)
	Int128Decl = buildImplicitTypedef(Int128Ty, "__int128_t");
	return Int128Decl;
	}

	TypedefDecl *ASTContext::getUInt128Decl() const {
	if (!UInt128Decl)
	UInt128Decl = buildImplicitTypedef(UnsignedInt128Ty, "__uint128_t");
	return UInt128Decl;
	}

	void ASTContext::InitBuiltinType(CanQualType &R, BuiltinType::Kind K) {
	auto Ty = new (this, TypeAlignment) BuiltinType(K);
	R = CanQualType::CreateUnsafe(QualType(Ty, 0));
	Types.push_back(Ty);
	}

	void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
	const TargetInfo *AuxTarget) {
	assert((!this->Target \|\| this->Target == &Target) &&
	"Incorrect target reinitialization");
	assert(VoidTy.isNull() && "Context reinitialized?");

	this->Target = &Target;
	this->AuxTarget = AuxTarget;

	ABI.reset(createCXXABI(Target));
	AddrSpaceMapMangling = isAddrSpaceMapManglingEnabled(Target, LangOpts);

	// C99 6.2.5p19.
	InitBuiltinType(VoidTy, BuiltinType::Void);

	// C99 6.2.5p2.
	InitBuiltinType(BoolTy, BuiltinType::Bool);
	// C99 6.2.5p3.
	if (LangOpts.CharIsSigned)
	InitBuiltinType(CharTy, BuiltinType::Char_S);
	else
	InitBuiltinType(CharTy, BuiltinType::Char_U);
	// C99 6.2.5p4.
	InitBuiltinType(SignedCharTy, BuiltinType::SChar);
	InitBuiltinType(ShortTy, BuiltinType::Short);
	InitBuiltinType(IntTy, BuiltinType::Int);
	InitBuiltinType(LongTy, BuiltinType::Long);
	InitBuiltinType(LongLongTy, BuiltinType::LongLong);

	// C99 6.2.5p6.
	InitBuiltinType(UnsignedCharTy, BuiltinType::UChar);
	InitBuiltinType(UnsignedShortTy, BuiltinType::UShort);
	InitBuiltinType(UnsignedIntTy, BuiltinType::UInt);
	InitBuiltinType(UnsignedLongTy, BuiltinType::ULong);
	InitBuiltinType(UnsignedLongLongTy, BuiltinType::ULongLong);

	// C99 6.2.5p10.
	InitBuiltinType(FloatTy, BuiltinType::Float);
	InitBuiltinType(DoubleTy, BuiltinType::Double);
	InitBuiltinType(LongDoubleTy, BuiltinType::LongDouble);

	// GNU extension, __float128 for IEEE quadruple precision
	InitBuiltinType(Float128Ty, BuiltinType::Float128);

	// __ibm128 for IBM extended precision
	InitBuiltinType(Ibm128Ty, BuiltinType::Ibm128);

	// C11 extension ISO/IEC TS 18661-3
	InitBuiltinType(Float16Ty, BuiltinType::Float16);

	// ISO/IEC JTC1 SC22 WG14 N1169 Extension
	InitBuiltinType(ShortAccumTy, BuiltinType::ShortAccum);
	InitBuiltinType(AccumTy, BuiltinType::Accum);
	InitBuiltinType(LongAccumTy, BuiltinType::LongAccum);
	InitBuiltinType(UnsignedShortAccumTy, BuiltinType::UShortAccum);
	InitBuiltinType(UnsignedAccumTy, BuiltinType::UAccum);
	InitBuiltinType(UnsignedLongAccumTy, BuiltinType::ULongAccum);
	InitBuiltinType(ShortFractTy, BuiltinType::ShortFract);
	InitBuiltinType(FractTy, BuiltinType::Fract);
	InitBuiltinType(LongFractTy, BuiltinType::LongFract);
	InitBuiltinType(UnsignedShortFractTy, BuiltinType::UShortFract);
	InitBuiltinType(UnsignedFractTy, BuiltinType::UFract);
	InitBuiltinType(UnsignedLongFractTy, BuiltinType::ULongFract);
	InitBuiltinType(SatShortAccumTy, BuiltinType::SatShortAccum);
	InitBuiltinType(SatAccumTy, BuiltinType::SatAccum);
	InitBuiltinType(SatLongAccumTy, BuiltinType::SatLongAccum);
	InitBuiltinType(SatUnsignedShortAccumTy, BuiltinType::SatUShortAccum);
	InitBuiltinType(SatUnsignedAccumTy, BuiltinType::SatUAccum);
	InitBuiltinType(SatUnsignedLongAccumTy, BuiltinType::SatULongAccum);
	InitBuiltinType(SatShortFractTy, BuiltinType::SatShortFract);
	InitBuiltinType(SatFractTy, BuiltinType::SatFract);
	InitBuiltinType(SatLongFractTy, BuiltinType::SatLongFract);
	InitBuiltinType(SatUnsignedShortFractTy, BuiltinType::SatUShortFract);
	InitBuiltinType(SatUnsignedFractTy, BuiltinType::SatUFract);
	InitBuiltinType(SatUnsignedLongFractTy, BuiltinType::SatULongFract);

	// GNU extension, 128-bit integers.
	InitBuiltinType(Int128Ty, BuiltinType::Int128);
	InitBuiltinType(UnsignedInt128Ty, BuiltinType::UInt128);

	// C++ 3.9.1p5
	if (TargetInfo::isTypeSigned(Target.getWCharType()))
	InitBuiltinType(WCharTy, BuiltinType::WChar_S);
	else // -fshort-wchar makes wchar_t be unsigned.
	InitBuiltinType(WCharTy, BuiltinType::WChar_U);
	if (LangOpts.CPlusPlus && LangOpts.WChar)
	WideCharTy = WCharTy;
	else {
	// C99 (or C++ using -fno-wchar).
	WideCharTy = getFromTargetType(Target.getWCharType());
	}

	WIntTy = getFromTargetType(Target.getWIntType());

	// C++20 (proposed)
	InitBuiltinType(Char8Ty, BuiltinType::Char8);

	if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
	InitBuiltinType(Char16Ty, BuiltinType::Char16);
	else // C99
	Char16Ty = getFromTargetType(Target.getChar16Type());

	if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
	InitBuiltinType(Char32Ty, BuiltinType::Char32);
	else // C99
	Char32Ty = getFromTargetType(Target.getChar32Type());

	// Placeholder type for type-dependent expressions whose type is
	// completely unknown. No code should ever check a type against
	// DependentTy and users should never see it; however, it is here to
	// help diagnose failures to properly check for type-dependent
	// expressions.
	InitBuiltinType(DependentTy, BuiltinType::Dependent);

	// Placeholder type for functions.
	InitBuiltinType(OverloadTy, BuiltinType::Overload);

	// Placeholder type for bound members.
	InitBuiltinType(BoundMemberTy, BuiltinType::BoundMember);

	// Placeholder type for pseudo-objects.
	InitBuiltinType(PseudoObjectTy, BuiltinType::PseudoObject);

	// "any" type; useful for debugger-like clients.
	InitBuiltinType(UnknownAnyTy, BuiltinType::UnknownAny);

	// Placeholder type for unbridged ARC casts.
	InitBuiltinType(ARCUnbridgedCastTy, BuiltinType::ARCUnbridgedCast);

	// Placeholder type for builtin functions.
	InitBuiltinType(BuiltinFnTy, BuiltinType::BuiltinFn);

	// Placeholder type for OMP array sections.
	if (LangOpts.OpenMP) {
	InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
	InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping);
	InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator);
	}
	if (LangOpts.MatrixTypes)
	InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);

	// Builtin types for 'id', 'Class', and 'SEL'.
	InitBuiltinType(ObjCBuiltinIdTy, BuiltinType::ObjCId);
	InitBuiltinType(ObjCBuiltinClassTy, BuiltinType::ObjCClass);
	InitBuiltinType(ObjCBuiltinSelTy, BuiltinType::ObjCSel);

	if (LangOpts.OpenCL) {
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	InitBuiltinType(SingletonId, BuiltinType::Id);
	#include "clang/Basic/OpenCLImageTypes.def"

	InitBuiltinType(OCLSamplerTy, BuiltinType::OCLSampler);
	InitBuiltinType(OCLEventTy, BuiltinType::OCLEvent);
	InitBuiltinType(OCLClkEventTy, BuiltinType::OCLClkEvent);
	InitBuiltinType(OCLQueueTy, BuiltinType::OCLQueue);
	InitBuiltinType(OCLReserveIDTy, BuiltinType::OCLReserveID);

	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	InitBuiltinType(Id##Ty, BuiltinType::Id);
	#include "clang/Basic/OpenCLExtensionTypes.def"
	}

	if (Target.hasAArch64SVETypes()) {
	#define SVE_TYPE(Name, Id, SingletonId) \
	InitBuiltinType(SingletonId, BuiltinType::Id);
	#include "clang/Basic/AArch64SVEACLETypes.def"
	}

	if (Target.getTriple().isPPC64()) {
	#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
	InitBuiltinType(Id##Ty, BuiltinType::Id);
	#include "clang/Basic/PPCTypes.def"
	#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
	InitBuiltinType(Id##Ty, BuiltinType::Id);
	#include "clang/Basic/PPCTypes.def"
	}

	if (Target.hasRISCVVTypes()) {
	#define RVV_TYPE(Name, Id, SingletonId) \
	InitBuiltinType(SingletonId, BuiltinType::Id);
	#include "clang/Basic/RISCVVTypes.def"
	}

	// Builtin type for __objc_yes and __objc_no
	ObjCBuiltinBoolTy = (Target.useSignedCharForObjCBool() ?
	SignedCharTy : BoolTy);

	ObjCConstantStringType = QualType();

	ObjCSuperType = QualType();

	// void * type
	if (LangOpts.OpenCLGenericAddressSpace) {
	auto Q = VoidTy.getQualifiers();
	Q.setAddressSpace(LangAS::opencl_generic);
	VoidPtrTy = getPointerType(getCanonicalType(
	getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
	} else {
	VoidPtrTy = getPointerType(VoidTy);
	}

	// nullptr type (C++0x 2.14.7)
	InitBuiltinType(NullPtrTy, BuiltinType::NullPtr);

	// half type (OpenCL 6.1.1.1) / ARM NEON __fp16
	InitBuiltinType(HalfTy, BuiltinType::Half);

	InitBuiltinType(BFloat16Ty, BuiltinType::BFloat16);

	// Builtin type used to help define __builtin_va_list.
	VaListTagDecl = nullptr;

	// MSVC predeclares struct _GUID, and we need it to create MSGuidDecls.
	if (LangOpts.MicrosoftExt \|\| LangOpts.Borland) {
	MSGuidTagDecl = buildImplicitRecord("_GUID");
	getTranslationUnitDecl()->addDecl(MSGuidTagDecl);
	}
	}

	DiagnosticsEngine &ASTContext::getDiagnostics() const {
	return SourceMgr.getDiagnostics();
	}

	AttrVec& ASTContext::getDeclAttrs(const Decl *D) {
	AttrVec *&Result = DeclAttrs[D];
	if (!Result) {
	void *Mem = Allocate(sizeof(AttrVec));
	Result = new (Mem) AttrVec;
	}

	return *Result;
	}

	/// Erase the attributes corresponding to the given declaration.
	void ASTContext::eraseDeclAttrs(const Decl *D) {
	llvm::DenseMap<const Decl, AttrVec>::iterator Pos = DeclAttrs.find(D);
	if (Pos != DeclAttrs.end()) {
	Pos->second->~AttrVec();
	DeclAttrs.erase(Pos);
	}
	}

	// FIXME: Remove ?
	MemberSpecializationInfo *
	ASTContext::getInstantiatedFromStaticDataMember(const VarDecl *Var) {
	assert(Var->isStaticDataMember() && "Not a static data member");
	return getTemplateOrSpecializationInfo(Var)
	.dyn_cast<MemberSpecializationInfo *>();
	}

	ASTContext::TemplateOrSpecializationInfo
	ASTContext::getTemplateOrSpecializationInfo(const VarDecl *Var) {
	llvm::DenseMap<const VarDecl *, TemplateOrSpecializationInfo>::iterator Pos =
	TemplateOrInstantiation.find(Var);
	if (Pos == TemplateOrInstantiation.end())
	return {};

	return Pos->second;
	}

	void
	ASTContext::setInstantiatedFromStaticDataMember(VarDecl Inst, VarDecl Tmpl,
	TemplateSpecializationKind TSK,
	SourceLocation PointOfInstantiation) {
	assert(Inst->isStaticDataMember() && "Not a static data member");
	assert(Tmpl->isStaticDataMember() && "Not a static data member");
	setTemplateOrSpecializationInfo(Inst, new (*this) MemberSpecializationInfo(
	Tmpl, TSK, PointOfInstantiation));
	}

	void
	ASTContext::setTemplateOrSpecializationInfo(VarDecl *Inst,
	TemplateOrSpecializationInfo TSI) {
	assert(!TemplateOrInstantiation[Inst] &&
	"Already noted what the variable was instantiated from");
	TemplateOrInstantiation[Inst] = TSI;
	}

	NamedDecl *
	ASTContext::getInstantiatedFromUsingDecl(NamedDecl *UUD) {
	auto Pos = InstantiatedFromUsingDecl.find(UUD);
	if (Pos == InstantiatedFromUsingDecl.end())
	return nullptr;

	return Pos->second;
	}

	void
	ASTContext::setInstantiatedFromUsingDecl(NamedDecl Inst, NamedDecl Pattern) {
	assert((isa<UsingDecl>(Pattern) \|\|
	isa<UnresolvedUsingValueDecl>(Pattern) \|\|
	isa<UnresolvedUsingTypenameDecl>(Pattern)) &&
	"pattern decl is not a using decl");
	assert((isa<UsingDecl>(Inst) \|\|
	isa<UnresolvedUsingValueDecl>(Inst) \|\|
	isa<UnresolvedUsingTypenameDecl>(Inst)) &&
	"instantiation did not produce a using decl");
	assert(!InstantiatedFromUsingDecl[Inst] && "pattern already exists");
	InstantiatedFromUsingDecl[Inst] = Pattern;
	}

	UsingEnumDecl *
	ASTContext::getInstantiatedFromUsingEnumDecl(UsingEnumDecl *UUD) {
	auto Pos = InstantiatedFromUsingEnumDecl.find(UUD);
	if (Pos == InstantiatedFromUsingEnumDecl.end())
	return nullptr;

	return Pos->second;
	}

	void ASTContext::setInstantiatedFromUsingEnumDecl(UsingEnumDecl *Inst,
	UsingEnumDecl *Pattern) {
	assert(!InstantiatedFromUsingEnumDecl[Inst] && "pattern already exists");
	InstantiatedFromUsingEnumDecl[Inst] = Pattern;
	}

	UsingShadowDecl *
	ASTContext::getInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst) {
	llvm::DenseMap<UsingShadowDecl, UsingShadowDecl>::const_iterator Pos
	= InstantiatedFromUsingShadowDecl.find(Inst);
	if (Pos == InstantiatedFromUsingShadowDecl.end())
	return nullptr;

	return Pos->second;
	}

	void
	ASTContext::setInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst,
	UsingShadowDecl *Pattern) {
	assert(!InstantiatedFromUsingShadowDecl[Inst] && "pattern already exists");
	InstantiatedFromUsingShadowDecl[Inst] = Pattern;
	}

	FieldDecl ASTContext::getInstantiatedFromUnnamedFieldDecl(FieldDecl Field) {
	llvm::DenseMap<FieldDecl , FieldDecl >::iterator Pos
	= InstantiatedFromUnnamedFieldDecl.find(Field);
	if (Pos == InstantiatedFromUnnamedFieldDecl.end())
	return nullptr;

	return Pos->second;
	}

	void ASTContext::setInstantiatedFromUnnamedFieldDecl(FieldDecl *Inst,
	FieldDecl *Tmpl) {
	assert(!Inst->getDeclName() && "Instantiated field decl is not unnamed");
	assert(!Tmpl->getDeclName() && "Template field decl is not unnamed");
	assert(!InstantiatedFromUnnamedFieldDecl[Inst] &&
	"Already noted what unnamed field was instantiated from");

	InstantiatedFromUnnamedFieldDecl[Inst] = Tmpl;
	}

	ASTContext::overridden_cxx_method_iterator
	ASTContext::overridden_methods_begin(const CXXMethodDecl *Method) const {
	return overridden_methods(Method).begin();
	}

	ASTContext::overridden_cxx_method_iterator
	ASTContext::overridden_methods_end(const CXXMethodDecl *Method) const {
	return overridden_methods(Method).end();
	}

	unsigned
	ASTContext::overridden_methods_size(const CXXMethodDecl *Method) const {
	auto Range = overridden_methods(Method);
	return Range.end() - Range.begin();
	}

	ASTContext::overridden_method_range
	ASTContext::overridden_methods(const CXXMethodDecl *Method) const {
	llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
	OverriddenMethods.find(Method->getCanonicalDecl());
	if (Pos == OverriddenMethods.end())
	return overridden_method_range(nullptr, nullptr);
	return overridden_method_range(Pos->second.begin(), Pos->second.end());
	}

	void ASTContext::addOverriddenMethod(const CXXMethodDecl *Method,
	const CXXMethodDecl *Overridden) {
	assert(Method->isCanonicalDecl() && Overridden->isCanonicalDecl());
	OverriddenMethods[Method].push_back(Overridden);
	}

	void ASTContext::getOverriddenMethods(
	const NamedDecl *D,
	SmallVectorImpl<const NamedDecl *> &Overridden) const {
	assert(D);

	if (const auto *CXXMethod = dyn_cast<CXXMethodDecl>(D)) {
	Overridden.append(overridden_methods_begin(CXXMethod),
	overridden_methods_end(CXXMethod));
	return;
	}

	const auto *Method = dyn_cast<ObjCMethodDecl>(D);
	if (!Method)
	return;

	SmallVector<const ObjCMethodDecl *, 8> OverDecls;
	Method->getOverriddenMethods(OverDecls);
	Overridden.append(OverDecls.begin(), OverDecls.end());
	}

	void ASTContext::addedLocalImportDecl(ImportDecl *Import) {
	assert(!Import->getNextLocalImport() &&
	"Import declaration already in the chain");
	assert(!Import->isFromASTFile() && "Non-local import declaration");
	if (!FirstLocalImport) {
	FirstLocalImport = Import;
	LastLocalImport = Import;
	return;
	}

	LastLocalImport->setNextLocalImport(Import);
	LastLocalImport = Import;
	}

	//===----------------------------------------------------------------------===//
	// Type Sizing and Analysis
	//===----------------------------------------------------------------------===//

	/// getFloatTypeSemantics - Return the APFloat 'semantics' for the specified
	/// scalar floating point type.
	const llvm::fltSemantics &ASTContext::getFloatTypeSemantics(QualType T) const {
	switch (T->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a floating point type!");
	case BuiltinType::BFloat16:
	return Target->getBFloat16Format();
	case BuiltinType::Float16:
	return Target->getHalfFormat();
	case BuiltinType::Half:
	// For HLSL, when the native half type is disabled, half will be treat as
	// float.
	if (getLangOpts().HLSL)
	if (getLangOpts().NativeHalfType)
	return Target->getHalfFormat();
	else
	return Target->getFloatFormat();
	else
	return Target->getHalfFormat();
	case BuiltinType::Float: return Target->getFloatFormat();
	case BuiltinType::Double: return Target->getDoubleFormat();
	case BuiltinType::Ibm128:
	return Target->getIbm128Format();
	case BuiltinType::LongDouble:
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
	return AuxTarget->getLongDoubleFormat();
	return Target->getLongDoubleFormat();
	case BuiltinType::Float128:
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
	return AuxTarget->getFloat128Format();
	return Target->getFloat128Format();
	}
	}

	CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const {
	unsigned Align = Target->getCharWidth();

	bool UseAlignAttrOnly = false;
	if (unsigned AlignFromAttr = D->getMaxAlignment()) {
	Align = AlignFromAttr;

	// __attribute__((aligned)) can increase or decrease alignment
	// except on a struct or struct member, where it only increases
	// alignment unless 'packed' is also specified.
	//
	// It is an error for alignas to decrease alignment, so we can
	// ignore that possibility; Sema should diagnose it.
	if (isa<FieldDecl>(D)) {
	UseAlignAttrOnly = D->hasAttr<PackedAttr>() \|\|
	cast<FieldDecl>(D)->getParent()->hasAttr<PackedAttr>();
	} else {
	UseAlignAttrOnly = true;
	}
	}
	else if (isa<FieldDecl>(D))
	UseAlignAttrOnly =
	D->hasAttr<PackedAttr>() \|\|
	cast<FieldDecl>(D)->getParent()->hasAttr<PackedAttr>();

	// If we're using the align attribute only, just ignore everything
	// else about the declaration and its type.
	if (UseAlignAttrOnly) {
	// do nothing
	} else if (const auto *VD = dyn_cast<ValueDecl>(D)) {
	QualType T = VD->getType();
	if (const auto *RT = T->getAs<ReferenceType>()) {
	if (ForAlignof)
	T = RT->getPointeeType();
	else
	T = getPointerType(RT->getPointeeType());
	}
	QualType BaseT = getBaseElementType(T);
	if (T->isFunctionType())
	Align = getTypeInfoImpl(T.getTypePtr()).Align;
	else if (!BaseT->isIncompleteType()) {
	// Adjust alignments of declarations with array type by the
	// large-array alignment on the target.
	if (const ArrayType *arrayType = getAsArrayType(T)) {
	unsigned MinWidth = Target->getLargeArrayMinWidth();
	if (!ForAlignof && MinWidth) {
	if (isa<VariableArrayType>(arrayType))
	Align = std::max(Align, Target->getLargeArrayAlign());
	else if (isa<ConstantArrayType>(arrayType) &&
	MinWidth <= getTypeSize(cast<ConstantArrayType>(arrayType)))
	Align = std::max(Align, Target->getLargeArrayAlign());
	}
	}
	Align = std::max(Align, getPreferredTypeAlign(T.getTypePtr()));
	if (BaseT.getQualifiers().hasUnaligned())
	Align = Target->getCharWidth();
	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	if (VD->hasGlobalStorage() && !ForAlignof) {
	uint64_t TypeSize = getTypeSize(T.getTypePtr());
	Align = std::max(Align, getTargetInfo().getMinGlobalAlign(TypeSize));
	}
	}
	}

	// Fields can be subject to extra alignment constraints, like if
	// the field is packed, the struct is packed, or the struct has a
	// a max-field-alignment constraint (#pragma pack). So calculate
	// the actual alignment of the field within the struct, and then
	// (as we're expected to) constrain that by the alignment of the type.
	if (const auto *Field = dyn_cast<FieldDecl>(VD)) {
	const RecordDecl *Parent = Field->getParent();
	// We can only produce a sensible answer if the record is valid.
	if (!Parent->isInvalidDecl()) {
	const ASTRecordLayout &Layout = getASTRecordLayout(Parent);

	// Start with the record's overall alignment.
	unsigned FieldAlign = toBits(Layout.getAlignment());

	// Use the GCD of that and the offset within the record.
	uint64_t Offset = Layout.getFieldOffset(Field->getFieldIndex());
	if (Offset > 0) {
	// Alignment is always a power of 2, so the GCD will be a power of 2,
	// which means we get to do this crazy thing instead of Euclid's.
	uint64_t LowBitOfOffset = Offset & (~Offset + 1);
	if (LowBitOfOffset < FieldAlign)
	FieldAlign = static_cast<unsigned>(LowBitOfOffset);
	}

	Align = std::min(Align, FieldAlign);
	}
	}
	}

	// Some targets have hard limitation on the maximum requestable alignment in
	// aligned attribute for static variables.
	const unsigned MaxAlignedAttr = getTargetInfo().getMaxAlignedAttribute();
	const auto *VD = dyn_cast<VarDecl>(D);
	if (MaxAlignedAttr && VD && VD->getStorageClass() == SC_Static)
	Align = std::min(Align, MaxAlignedAttr);

	return toCharUnitsFromBits(Align);
	}

	CharUnits ASTContext::getExnObjectAlignment() const {
	return toCharUnitsFromBits(Target->getExnObjectAlignment());
	}

	// getTypeInfoDataSizeInChars - Return the size of a type, in
	// chars. If the type is a record, its data size is returned. This is
	// the size of the memcpy that's performed when assigning this type
	// using a trivial copy/move assignment operator.
	TypeInfoChars ASTContext::getTypeInfoDataSizeInChars(QualType T) const {
	TypeInfoChars Info = getTypeInfoInChars(T);

	// In C++, objects can sometimes be allocated into the tail padding
	// of a base-class subobject. We decide whether that's possible
	// during class layout, so here we can just trust the layout results.
	if (getLangOpts().CPlusPlus) {
	if (const auto *RT = T->getAs<RecordType>()) {
	const ASTRecordLayout &layout = getASTRecordLayout(RT->getDecl());
	Info.Width = layout.getDataSize();
	}
	}

	return Info;
	}

	/// getConstantArrayInfoInChars - Performing the computation in CharUnits
	/// instead of in bits prevents overflowing the uint64_t for some large arrays.
	TypeInfoChars
	static getConstantArrayInfoInChars(const ASTContext &Context,
	const ConstantArrayType *CAT) {
	TypeInfoChars EltInfo = Context.getTypeInfoInChars(CAT->getElementType());
	uint64_t Size = CAT->getSize().getZExtValue();
	assert((Size == 0 \|\| static_cast<uint64_t>(EltInfo.Width.getQuantity()) <=
	(uint64_t)(-1)/Size) &&
	"Overflow in array type char size evaluation");
	uint64_t Width = EltInfo.Width.getQuantity() * Size;
	unsigned Align = EltInfo.Align.getQuantity();
	if (!Context.getTargetInfo().getCXXABI().isMicrosoft() \|\|
	Context.getTargetInfo().getPointerWidth(LangAS::Default) == 64)
	Width = llvm::alignTo(Width, Align);
	return TypeInfoChars(CharUnits::fromQuantity(Width),
	CharUnits::fromQuantity(Align),
	EltInfo.AlignRequirement);
	}

	TypeInfoChars ASTContext::getTypeInfoInChars(const Type *T) const {
	if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
	return getConstantArrayInfoInChars(*this, CAT);
	TypeInfo Info = getTypeInfo(T);
	return TypeInfoChars(toCharUnitsFromBits(Info.Width),
	toCharUnitsFromBits(Info.Align), Info.AlignRequirement);
	}

	TypeInfoChars ASTContext::getTypeInfoInChars(QualType T) const {
	return getTypeInfoInChars(T.getTypePtr());
	}

	bool ASTContext::isPromotableIntegerType(QualType T) const {
	// HLSL doesn't promote all small integer types to int, it
	// just uses the rank-based promotion rules for all types.
	if (getLangOpts().HLSL)
	return false;

	if (const auto *BT = T->getAs<BuiltinType>())
	switch (BT->getKind()) {
	case BuiltinType::Bool:
	case BuiltinType::Char_S:
	case BuiltinType::Char_U:
	case BuiltinType::SChar:
	case BuiltinType::UChar:
	case BuiltinType::Short:
	case BuiltinType::UShort:
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	case BuiltinType::Char8:
	case BuiltinType::Char16:
	case BuiltinType::Char32:
	return true;
	default:
	return false;
	}

	// Enumerated types are promotable to their compatible integer types
	// (C99 6.3.1.1) a.k.a. its underlying type (C++ [conv.prom]p2).
	if (const auto *ET = T->getAs<EnumType>()) {
	if (T->isDependentType() \|\| ET->getDecl()->getPromotionType().isNull() \|\|
	ET->getDecl()->isScoped())
	return false;

	return true;
	}

	return false;
	}

	bool ASTContext::isAlignmentRequired(const Type *T) const {
	return getTypeInfo(T).AlignRequirement != AlignRequirementKind::None;
	}

	bool ASTContext::isAlignmentRequired(QualType T) const {
	return isAlignmentRequired(T.getTypePtr());
	}

	unsigned ASTContext::getTypeAlignIfKnown(QualType T,
	bool NeedsPreferredAlignment) const {
	// An alignment on a typedef overrides anything else.
	if (const auto *TT = T->getAs<TypedefType>())
	if (unsigned Align = TT->getDecl()->getMaxAlignment())
	return Align;

	// If we have an (array of) complete type, we're done.
	T = getBaseElementType(T);
	if (!T->isIncompleteType())
	return NeedsPreferredAlignment ? getPreferredTypeAlign(T) : getTypeAlign(T);

	// If we had an array type, its element type might be a typedef
	// type with an alignment attribute.
	if (const auto *TT = T->getAs<TypedefType>())
	if (unsigned Align = TT->getDecl()->getMaxAlignment())
	return Align;

	// Otherwise, see if the declaration of the type had an attribute.
	if (const auto *TT = T->getAs<TagType>())
	return TT->getDecl()->getMaxAlignment();

	return 0;
	}

	TypeInfo ASTContext::getTypeInfo(const Type *T) const {
	TypeInfoMap::iterator I = MemoizedTypeInfo.find(T);
	if (I != MemoizedTypeInfo.end())
	return I->second;

	// This call can invalidate MemoizedTypeInfo[T], so we need a second lookup.
	TypeInfo TI = getTypeInfoImpl(T);
	MemoizedTypeInfo[T] = TI;
	return TI;
	}

	/// getTypeInfoImpl - Return the size of the specified type, in bits. This
	/// method does not work on incomplete types.
	///
	/// FIXME: Pointers into different addr spaces could have different sizes and
	/// alignment requirements: getPointerInfo should take an AddrSpace, this
	/// should take a QualType, &c.
	TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
	uint64_t Width = 0;
	unsigned Align = 8;
	AlignRequirementKind AlignRequirement = AlignRequirementKind::None;
	LangAS AS = LangAS::Default;
	switch (T->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) \
	case Type::Class: \
	assert(!T->isDependentType() && "should not see dependent types here"); \
	return getTypeInfo(cast<Class##Type>(T)->desugar().getTypePtr());
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("Should not see dependent types");

	case Type::FunctionNoProto:
	case Type::FunctionProto:
	// GCC extension: alignof(function) = 32 bits
	Width = 0;
	Align = 32;
	break;

	case Type::IncompleteArray:
	case Type::VariableArray:
	case Type::ConstantArray: {
	// Model non-constant sized arrays as size zero, but track the alignment.
	uint64_t Size = 0;
	if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
	Size = CAT->getSize().getZExtValue();

	TypeInfo EltInfo = getTypeInfo(cast<ArrayType>(T)->getElementType());
	assert((Size == 0 \|\| EltInfo.Width <= (uint64_t)(-1) / Size) &&
	"Overflow in array type bit size evaluation");
	Width = EltInfo.Width * Size;
	Align = EltInfo.Align;
	AlignRequirement = EltInfo.AlignRequirement;
	if (!getTargetInfo().getCXXABI().isMicrosoft() \|\|
	getTargetInfo().getPointerWidth(LangAS::Default) == 64)
	Width = llvm::alignTo(Width, Align);
	break;
	}

	case Type::ExtVector:
	case Type::Vector: {
	const auto *VT = cast<VectorType>(T);
	TypeInfo EltInfo = getTypeInfo(VT->getElementType());
	Width = VT->isExtVectorBoolType() ? VT->getNumElements()
	: EltInfo.Width * VT->getNumElements();
	// Enforce at least byte alignment.
	Align = std::max<unsigned>(8, Width);

	// If the alignment is not a power of 2, round up to the next power of 2.
	// This happens for non-power-of-2 length vectors.
	if (Align & (Align-1)) {
	Align = llvm::NextPowerOf2(Align);
	Width = llvm::alignTo(Width, Align);
	}
	// Adjust the alignment based on the target max.
	uint64_t TargetVectorAlign = Target->getMaxVectorAlign();
	if (TargetVectorAlign && TargetVectorAlign < Align)
	Align = TargetVectorAlign;
	if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector)
	// Adjust the alignment for fixed-length SVE vectors. This is important
	// for non-power-of-2 vector lengths.
	Align = 128;
	else if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
	// Adjust the alignment for fixed-length SVE predicates.
	Align = 16;
	break;
	}

	case Type::ConstantMatrix: {
	const auto *MT = cast<ConstantMatrixType>(T);
	TypeInfo ElementInfo = getTypeInfo(MT->getElementType());
	// The internal layout of a matrix value is implementation defined.
	// Initially be ABI compatible with arrays with respect to alignment and
	// size.
	Width = ElementInfo.Width * MT->getNumRows() * MT->getNumColumns();
	Align = ElementInfo.Align;
	break;
	}

	case Type::Builtin:
	switch (cast<BuiltinType>(T)->getKind()) {
	default: llvm_unreachable("Unknown builtin type!");
	case BuiltinType::Void:
	// GCC extension: alignof(void) = 8 bits.
	Width = 0;
	Align = 8;
	break;
	case BuiltinType::Bool:
	Width = Target->getBoolWidth();
	Align = Target->getBoolAlign();
	break;
	case BuiltinType::Char_S:
	case BuiltinType::Char_U:
	case BuiltinType::UChar:
	case BuiltinType::SChar:
	case BuiltinType::Char8:
	Width = Target->getCharWidth();
	Align = Target->getCharAlign();
	break;
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	Width = Target->getWCharWidth();
	Align = Target->getWCharAlign();
	break;
	case BuiltinType::Char16:
	Width = Target->getChar16Width();
	Align = Target->getChar16Align();
	break;
	case BuiltinType::Char32:
	Width = Target->getChar32Width();
	Align = Target->getChar32Align();
	break;
	case BuiltinType::UShort:
	case BuiltinType::Short:
	Width = Target->getShortWidth();
	Align = Target->getShortAlign();
	break;
	case BuiltinType::UInt:
	case BuiltinType::Int:
	Width = Target->getIntWidth();
	Align = Target->getIntAlign();
	break;
	case BuiltinType::ULong:
	case BuiltinType::Long:
	Width = Target->getLongWidth();
	Align = Target->getLongAlign();
	break;
	case BuiltinType::ULongLong:
	case BuiltinType::LongLong:
	Width = Target->getLongLongWidth();
	Align = Target->getLongLongAlign();
	break;
	case BuiltinType::Int128:
	case BuiltinType::UInt128:
	Width = 128;
	Align = Target->getInt128Align();
	break;
	case BuiltinType::ShortAccum:
	case BuiltinType::UShortAccum:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatUShortAccum:
	Width = Target->getShortAccumWidth();
	Align = Target->getShortAccumAlign();
	break;
	case BuiltinType::Accum:
	case BuiltinType::UAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatUAccum:
	Width = Target->getAccumWidth();
	Align = Target->getAccumAlign();
	break;
	case BuiltinType::LongAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::SatLongAccum:
	case BuiltinType::SatULongAccum:
	Width = Target->getLongAccumWidth();
	Align = Target->getLongAccumAlign();
	break;
	case BuiltinType::ShortFract:
	case BuiltinType::UShortFract:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatUShortFract:
	Width = Target->getShortFractWidth();
	Align = Target->getShortFractAlign();
	break;
	case BuiltinType::Fract:
	case BuiltinType::UFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatUFract:
	Width = Target->getFractWidth();
	Align = Target->getFractAlign();
	break;
	case BuiltinType::LongFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatULongFract:
	Width = Target->getLongFractWidth();
	Align = Target->getLongFractAlign();
	break;
	case BuiltinType::BFloat16:
	if (Target->hasBFloat16Type()) {
	Width = Target->getBFloat16Width();
	Align = Target->getBFloat16Align();
	}
	break;
	case BuiltinType::Float16:
	case BuiltinType::Half:
	if (Target->hasFloat16Type() \|\| !getLangOpts().OpenMP \|\|
	!getLangOpts().OpenMPIsDevice) {
	Width = Target->getHalfWidth();
	Align = Target->getHalfAlign();
	} else {
	assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
	"Expected OpenMP device compilation.");
	Width = AuxTarget->getHalfWidth();
	Align = AuxTarget->getHalfAlign();
	}
	break;
	case BuiltinType::Float:
	Width = Target->getFloatWidth();
	Align = Target->getFloatAlign();
	break;
	case BuiltinType::Double:
	Width = Target->getDoubleWidth();
	Align = Target->getDoubleAlign();
	break;
	case BuiltinType::Ibm128:
	Width = Target->getIbm128Width();
	Align = Target->getIbm128Align();
	break;
	case BuiltinType::LongDouble:
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
	(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() \|\|
	Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
	Width = AuxTarget->getLongDoubleWidth();
	Align = AuxTarget->getLongDoubleAlign();
	} else {
	Width = Target->getLongDoubleWidth();
	Align = Target->getLongDoubleAlign();
	}
	break;
	case BuiltinType::Float128:
	if (Target->hasFloat128Type() \|\| !getLangOpts().OpenMP \|\|
	!getLangOpts().OpenMPIsDevice) {
	Width = Target->getFloat128Width();
	Align = Target->getFloat128Align();
	} else {
	assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
	"Expected OpenMP device compilation.");
	Width = AuxTarget->getFloat128Width();
	Align = AuxTarget->getFloat128Align();
	}
	break;
	case BuiltinType::NullPtr:
	// C++ 3.9.1p11: sizeof(nullptr_t) == sizeof(void*)
	Width = Target->getPointerWidth(LangAS::Default);
	Align = Target->getPointerAlign(LangAS::Default);
	break;
	case BuiltinType::ObjCId:
	case BuiltinType::ObjCClass:
	case BuiltinType::ObjCSel:
	Width = Target->getPointerWidth(LangAS::Default);
	Align = Target->getPointerAlign(LangAS::Default);
	break;
	case BuiltinType::OCLSampler:
	case BuiltinType::OCLEvent:
	case BuiltinType::OCLClkEvent:
	case BuiltinType::OCLQueue:
	case BuiltinType::OCLReserveID:
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLExtensionTypes.def"
	AS = Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T));
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	// The SVE types are effectively target-specific. The length of an
	// SVE_VECTOR_TYPE is only known at runtime, but it is always a multiple
	// of 128 bits. There is one predicate bit for each vector byte, so the
	// length of an SVE_PREDICATE_TYPE is always a multiple of 16 bits.
	//
	// Because the length is only known at runtime, we use a dummy value
	// of 0 for the static length. The alignment values are those defined
	// by the Procedure Call Standard for the Arm Architecture.
	#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \
	IsSigned, IsFP, IsBF) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = 128; \
	break;
	#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = 16; \
	break;
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case BuiltinType::Id: \
	Width = Size; \
	Align = Size; \
	break;
	#include "clang/Basic/PPCTypes.def"
	#define RVV_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, NF, IsSigned, \
	IsFP) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = ElBits; \
	break;
	#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, ElKind) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = 8; \
	break;
	#include "clang/Basic/RISCVVTypes.def"
	}
	break;
	case Type::ObjCObjectPointer:
	Width = Target->getPointerWidth(LangAS::Default);
	Align = Target->getPointerAlign(LangAS::Default);
	break;
	case Type::BlockPointer:
	AS = cast<BlockPointerType>(T)->getPointeeType().getAddressSpace();
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	case Type::LValueReference:
	case Type::RValueReference:
	// alignof and sizeof should never enter this code path here, so we go
	// the pointer route.
	AS = cast<ReferenceType>(T)->getPointeeType().getAddressSpace();
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	case Type::Pointer:
	AS = cast<PointerType>(T)->getPointeeType().getAddressSpace();
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	case Type::MemberPointer: {
	const auto *MPT = cast<MemberPointerType>(T);
	CXXABI::MemberPointerInfo MPI = ABI->getMemberPointerInfo(MPT);
	Width = MPI.Width;
	Align = MPI.Align;
	break;
	}
	case Type::Complex: {
	// Complex types have the same alignment as their elements, but twice the
	// size.
	TypeInfo EltInfo = getTypeInfo(cast<ComplexType>(T)->getElementType());
	Width = EltInfo.Width * 2;
	Align = EltInfo.Align;
	break;
	}
	case Type::ObjCObject:
	return getTypeInfo(cast<ObjCObjectType>(T)->getBaseType().getTypePtr());
	case Type::Adjusted:
	case Type::Decayed:
	return getTypeInfo(cast<AdjustedType>(T)->getAdjustedType().getTypePtr());
	case Type::ObjCInterface: {
	const auto *ObjCI = cast<ObjCInterfaceType>(T);
	if (ObjCI->getDecl()->isInvalidDecl()) {
	Width = 8;
	Align = 8;
	break;
	}
	const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
	Width = toBits(Layout.getSize());
	Align = toBits(Layout.getAlignment());
	break;
	}
	case Type::BitInt: {
	const auto *EIT = cast<BitIntType>(T);
	Align =
	std::min(static_cast<unsigned>(std::max(
	getCharWidth(), llvm::PowerOf2Ceil(EIT->getNumBits()))),
	Target->getLongLongAlign());
	Width = llvm::alignTo(EIT->getNumBits(), Align);
	break;
	}
	case Type::Record:
	case Type::Enum: {
	const auto *TT = cast<TagType>(T);

	if (TT->getDecl()->isInvalidDecl()) {
	Width = 8;
	Align = 8;
	break;
	}

	if (const auto *ET = dyn_cast<EnumType>(TT)) {
	const EnumDecl *ED = ET->getDecl();
	TypeInfo Info =
	getTypeInfo(ED->getIntegerType()->getUnqualifiedDesugaredType());
	if (unsigned AttrAlign = ED->getMaxAlignment()) {
	Info.Align = AttrAlign;
	Info.AlignRequirement = AlignRequirementKind::RequiredByEnum;
	}
	return Info;
	}

	const auto *RT = cast<RecordType>(TT);
	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = getASTRecordLayout(RD);
	Width = toBits(Layout.getSize());
	Align = toBits(Layout.getAlignment());
	AlignRequirement = RD->hasAttr<AlignedAttr>()
	? AlignRequirementKind::RequiredByRecord
	: AlignRequirementKind::None;
	break;
	}

	case Type::SubstTemplateTypeParm:
	return getTypeInfo(cast<SubstTemplateTypeParmType>(T)->
	getReplacementType().getTypePtr());

	case Type::Auto:
	case Type::DeducedTemplateSpecialization: {
	const auto *A = cast<DeducedType>(T);
	assert(!A->getDeducedType().isNull() &&
	"cannot request the size of an undeduced or dependent auto type");
	return getTypeInfo(A->getDeducedType().getTypePtr());
	}

	case Type::Paren:
	return getTypeInfo(cast<ParenType>(T)->getInnerType().getTypePtr());

	case Type::MacroQualified:
	return getTypeInfo(
	cast<MacroQualifiedType>(T)->getUnderlyingType().getTypePtr());

	case Type::ObjCTypeParam:
	return getTypeInfo(cast<ObjCTypeParamType>(T)->desugar().getTypePtr());

	case Type::Using:
	return getTypeInfo(cast<UsingType>(T)->desugar().getTypePtr());

	case Type::Typedef: {
	const auto *TT = cast<TypedefType>(T);
	TypeInfo Info = getTypeInfo(TT->desugar().getTypePtr());
	// If the typedef has an aligned attribute on it, it overrides any computed
	// alignment we have. This violates the GCC documentation (which says that
	// attribute(aligned) can only round up) but matches its implementation.
	if (unsigned AttrAlign = TT->getDecl()->getMaxAlignment()) {
	Align = AttrAlign;
	AlignRequirement = AlignRequirementKind::RequiredByTypedef;
	} else {
	Align = Info.Align;
	AlignRequirement = Info.AlignRequirement;
	}
	Width = Info.Width;
	break;
	}

	case Type::Elaborated:
	return getTypeInfo(cast<ElaboratedType>(T)->getNamedType().getTypePtr());

	case Type::Attributed:
	return getTypeInfo(
	cast<AttributedType>(T)->getEquivalentType().getTypePtr());

	case Type::BTFTagAttributed:
	return getTypeInfo(
	cast<BTFTagAttributedType>(T)->getWrappedType().getTypePtr());

	case Type::Atomic: {
	// Start with the base type information.
	TypeInfo Info = getTypeInfo(cast<AtomicType>(T)->getValueType());
	Width = Info.Width;
	Align = Info.Align;

	if (!Width) {
	// An otherwise zero-sized type should still generate an
	// atomic operation.
	Width = Target->getCharWidth();
	assert(Align);
	} else if (Width <= Target->getMaxAtomicPromoteWidth()) {
	// If the size of the type doesn't exceed the platform's max
	// atomic promotion width, make the size and alignment more
	// favorable to atomic operations:

	// Round the size up to a power of 2.
	if (!llvm::isPowerOf2_64(Width))
	Width = llvm::NextPowerOf2(Width);

	// Set the alignment equal to the size.
	Align = static_cast<unsigned>(Width);
	}
	}
	break;

	case Type::Pipe:
	Width = Target->getPointerWidth(LangAS::opencl_global);
	Align = Target->getPointerAlign(LangAS::opencl_global);
	break;
	}

	assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
	return TypeInfo(Width, Align, AlignRequirement);
	}

	unsigned ASTContext::getTypeUnadjustedAlign(const Type *T) const {
	UnadjustedAlignMap::iterator I = MemoizedUnadjustedAlign.find(T);
	if (I != MemoizedUnadjustedAlign.end())
	return I->second;

	unsigned UnadjustedAlign;
	if (const auto *RT = T->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = getASTRecordLayout(RD);
	UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
	} else if (const auto *ObjCI = T->getAs<ObjCInterfaceType>()) {
	const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
	UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
	} else {
	UnadjustedAlign = getTypeAlign(T->getUnqualifiedDesugaredType());
	}

	MemoizedUnadjustedAlign[T] = UnadjustedAlign;
	return UnadjustedAlign;
	}

	unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
	unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
	return SimdAlign;
	}

	/// toCharUnitsFromBits - Convert a size in bits to a size in characters.
	CharUnits ASTContext::toCharUnitsFromBits(int64_t BitSize) const {
	return CharUnits::fromQuantity(BitSize / getCharWidth());
	}

	/// toBits - Convert a size in characters to a size in characters.
	int64_t ASTContext::toBits(CharUnits CharSize) const {
	return CharSize.getQuantity() * getCharWidth();
	}

	/// getTypeSizeInChars - Return the size of the specified type, in characters.
	/// This method does not work on incomplete types.
	CharUnits ASTContext::getTypeSizeInChars(QualType T) const {
	return getTypeInfoInChars(T).Width;
	}
	CharUnits ASTContext::getTypeSizeInChars(const Type *T) const {
	return getTypeInfoInChars(T).Width;
	}

	/// getTypeAlignInChars - Return the ABI-specified alignment of a type, in
	/// characters. This method does not work on incomplete types.
	CharUnits ASTContext::getTypeAlignInChars(QualType T) const {
	return toCharUnitsFromBits(getTypeAlign(T));
	}
	CharUnits ASTContext::getTypeAlignInChars(const Type *T) const {
	return toCharUnitsFromBits(getTypeAlign(T));
	}

	/// getTypeUnadjustedAlignInChars - Return the ABI-specified alignment of a
	/// type, in characters, before alignment adjustments. This method does
	/// not work on incomplete types.
	CharUnits ASTContext::getTypeUnadjustedAlignInChars(QualType T) const {
	return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
	}
	CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const {
	return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
	}

	/// getPreferredTypeAlign - Return the "preferred" alignment of the specified
	/// type for the current target in bits. This can be different than the ABI
	/// alignment in cases where it is beneficial for performance or backwards
	/// compatibility preserving to overalign a data type. (Note: despite the name,
	/// the preferred alignment is ABI-impacting, and not an optimization.)
	unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
	TypeInfo TI = getTypeInfo(T);
	unsigned ABIAlign = TI.Align;

	T = T->getBaseElementTypeUnsafe();

	// The preferred alignment of member pointers is that of a pointer.
	if (T->isMemberPointerType())
	return getPreferredTypeAlign(getPointerDiffType().getTypePtr());

	if (!Target->allowsLargerPreferedTypeAlignment())
	return ABIAlign;

	if (const auto *RT = T->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();

	// When used as part of a typedef, or together with a 'packed' attribute,
	// the 'aligned' attribute can be used to decrease alignment. Note that the
	// 'packed' case is already taken into consideration when computing the
	// alignment, we only need to handle the typedef case here.
	if (TI.AlignRequirement == AlignRequirementKind::RequiredByTypedef \|\|
	RD->isInvalidDecl())
	return ABIAlign;

	unsigned PreferredAlign = static_cast<unsigned>(
	toBits(getASTRecordLayout(RD).PreferredAlignment));
	assert(PreferredAlign >= ABIAlign &&
	"PreferredAlign should be at least as large as ABIAlign.");
	return PreferredAlign;
	}

	// Double (and, for targets supporting AIX `power` alignment, long double) and
	// long long should be naturally aligned (despite requiring less alignment) if
	// possible.
	if (const auto *CT = T->getAs<ComplexType>())
	T = CT->getElementType().getTypePtr();
	if (const auto *ET = T->getAs<EnumType>())
	T = ET->getDecl()->getIntegerType().getTypePtr();
	if (T->isSpecificBuiltinType(BuiltinType::Double) \|\|
	T->isSpecificBuiltinType(BuiltinType::LongLong) \|\|
	T->isSpecificBuiltinType(BuiltinType::ULongLong) \|\|
	(T->isSpecificBuiltinType(BuiltinType::LongDouble) &&
	Target->defaultsToAIXPowerAlignment()))
	// Don't increase the alignment if an alignment attribute was specified on a
	// typedef declaration.
	if (!TI.isAlignRequired())
	return std::max(ABIAlign, (unsigned)getTypeSize(T));

	return ABIAlign;
	}

	/// getTargetDefaultAlignForAttributeAligned - Return the default alignment
	/// for __attribute__((aligned)) on this target, to be used if no alignment
	/// value is specified.
	unsigned ASTContext::getTargetDefaultAlignForAttributeAligned() const {
	return getTargetInfo().getDefaultAlignForAttributeAligned();
	}

	/// getAlignOfGlobalVar - Return the alignment in bits that should be given
	/// to a global variable of the specified type.
	unsigned ASTContext::getAlignOfGlobalVar(QualType T) const {
	uint64_t TypeSize = getTypeSize(T.getTypePtr());
	return std::max(getPreferredTypeAlign(T),
	getTargetInfo().getMinGlobalAlign(TypeSize));
	}

	/// getAlignOfGlobalVarInChars - Return the alignment in characters that
	/// should be given to a global variable of the specified type.
	CharUnits ASTContext::getAlignOfGlobalVarInChars(QualType T) const {
	return toCharUnitsFromBits(getAlignOfGlobalVar(T));
	}

	CharUnits ASTContext::getOffsetOfBaseWithVBPtr(const CXXRecordDecl *RD) const {
	CharUnits Offset = CharUnits::Zero();
	const ASTRecordLayout *Layout = &getASTRecordLayout(RD);
	while (const CXXRecordDecl *Base = Layout->getBaseSharingVBPtr()) {
	Offset += Layout->getBaseClassOffset(Base);
	Layout = &getASTRecordLayout(Base);
	}
	return Offset;
	}

	CharUnits ASTContext::getMemberPointerPathAdjustment(const APValue &MP) const {
	const ValueDecl *MPD = MP.getMemberPointerDecl();
	CharUnits ThisAdjustment = CharUnits::Zero();
	ArrayRef<const CXXRecordDecl*> Path = MP.getMemberPointerPath();
	bool DerivedMember = MP.isMemberPointerToDerivedMember();
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(MPD->getDeclContext());
	for (unsigned I = 0, N = Path.size(); I != N; ++I) {
	const CXXRecordDecl *Base = RD;
	const CXXRecordDecl *Derived = Path[I];
	if (DerivedMember)
	std::swap(Base, Derived);
	ThisAdjustment += getASTRecordLayout(Derived).getBaseClassOffset(Base);
	RD = Path[I];
	}
	if (DerivedMember)
	ThisAdjustment = -ThisAdjustment;
	return ThisAdjustment;
	}

	/// DeepCollectObjCIvars -
	/// This routine first collects all declared, but not synthesized, ivars in
	/// super class and then collects all ivars, including those synthesized for
	/// current class. This routine is used for implementation of current class
	/// when all ivars, declared and synthesized are known.
	void ASTContext::DeepCollectObjCIvars(const ObjCInterfaceDecl *OI,
	bool leafClass,
	SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const {
	if (const ObjCInterfaceDecl *SuperClass = OI->getSuperClass())
	DeepCollectObjCIvars(SuperClass, false, Ivars);
	if (!leafClass) {
	llvm::append_range(Ivars, OI->ivars());
	} else {
	auto IDecl = const_cast<ObjCInterfaceDecl >(OI);
	for (const ObjCIvarDecl *Iv = IDecl->all_declared_ivar_begin(); Iv;
	Iv= Iv->getNextIvar())
	Ivars.push_back(Iv);
	}
	}

	/// CollectInheritedProtocols - Collect all protocols in current class and
	/// those inherited by it.
	void ASTContext::CollectInheritedProtocols(const Decl *CDecl,
	llvm::SmallPtrSet<ObjCProtocolDecl*, 8> &Protocols) {
	if (const auto *OI = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
	// We can use protocol_iterator here instead of
	// all_referenced_protocol_iterator since we are walking all categories.
	for (auto *Proto : OI->all_referenced_protocols()) {
	CollectInheritedProtocols(Proto, Protocols);
	}

	// Categories of this Interface.
	for (const auto *Cat : OI->visible_categories())
	CollectInheritedProtocols(Cat, Protocols);

	if (ObjCInterfaceDecl *SD = OI->getSuperClass())
	while (SD) {
	CollectInheritedProtocols(SD, Protocols);
	SD = SD->getSuperClass();
	}
	} else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(CDecl)) {
	for (auto *Proto : OC->protocols()) {
	CollectInheritedProtocols(Proto, Protocols);
	}
	} else if (const auto *OP = dyn_cast<ObjCProtocolDecl>(CDecl)) {
	// Insert the protocol.
	if (!Protocols.insert(
	const_cast<ObjCProtocolDecl *>(OP->getCanonicalDecl())).second)
	return;

	for (auto *Proto : OP->protocols())
	CollectInheritedProtocols(Proto, Protocols);
	}
	}

	static bool unionHasUniqueObjectRepresentations(const ASTContext &Context,
	const RecordDecl *RD) {
	assert(RD->isUnion() && "Must be union type");
	CharUnits UnionSize = Context.getTypeSizeInChars(RD->getTypeForDecl());

	for (const auto *Field : RD->fields()) {
	if (!Context.hasUniqueObjectRepresentations(Field->getType()))
	return false;
	CharUnits FieldSize = Context.getTypeSizeInChars(Field->getType());
	if (FieldSize != UnionSize)
	return false;
	}
	return !RD->field_empty();
	}

	static int64_t getSubobjectOffset(const FieldDecl *Field,
	const ASTContext &Context,
	const clang::ASTRecordLayout & /Layout/) {
	return Context.getFieldOffset(Field);
	}

	static int64_t getSubobjectOffset(const CXXRecordDecl *RD,
	const ASTContext &Context,
	const clang::ASTRecordLayout &Layout) {
	return Context.toBits(Layout.getBaseClassOffset(RD));
	}

	static std::optional<int64_t>
	structHasUniqueObjectRepresentations(const ASTContext &Context,
	const RecordDecl *RD);

	static std::optional<int64_t>
	getSubobjectSizeInBits(const FieldDecl *Field, const ASTContext &Context) {
	if (Field->getType()->isRecordType()) {
	const RecordDecl *RD = Field->getType()->getAsRecordDecl();
	if (!RD->isUnion())
	return structHasUniqueObjectRepresentations(Context, RD);
	}

	// A _BitInt type may not be unique if it has padding bits
	// but if it is a bitfield the padding bits are not used.
	bool IsBitIntType = Field->getType()->isBitIntType();
	if (!Field->getType()->isReferenceType() && !IsBitIntType &&
	!Context.hasUniqueObjectRepresentations(Field->getType()))
	return std::nullopt;

	int64_t FieldSizeInBits =
	Context.toBits(Context.getTypeSizeInChars(Field->getType()));
	if (Field->isBitField()) {
	int64_t BitfieldSize = Field->getBitWidthValue(Context);
	if (IsBitIntType) {
	if ((unsigned)BitfieldSize >
	cast<BitIntType>(Field->getType())->getNumBits())
	return std::nullopt;
	} else if (BitfieldSize > FieldSizeInBits) {
	return std::nullopt;
	}
	FieldSizeInBits = BitfieldSize;
	} else if (IsBitIntType &&
	!Context.hasUniqueObjectRepresentations(Field->getType())) {
	return std::nullopt;
	}
	return FieldSizeInBits;
	}

	static std::optional<int64_t>
	getSubobjectSizeInBits(const CXXRecordDecl *RD, const ASTContext &Context) {
	return structHasUniqueObjectRepresentations(Context, RD);
	}

	template <typename RangeT>
	static std::optional<int64_t> structSubobjectsHaveUniqueObjectRepresentations(
	const RangeT &Subobjects, int64_t CurOffsetInBits,
	const ASTContext &Context, const clang::ASTRecordLayout &Layout) {
	for (const auto *Subobject : Subobjects) {
	std::optional<int64_t> SizeInBits =
	getSubobjectSizeInBits(Subobject, Context);
	if (!SizeInBits)
	return std::nullopt;
	if (*SizeInBits != 0) {
	int64_t Offset = getSubobjectOffset(Subobject, Context, Layout);
	if (Offset != CurOffsetInBits)
	return std::nullopt;
	CurOffsetInBits += *SizeInBits;
	}
	}
	return CurOffsetInBits;
	}

	static std::optional<int64_t>
	structHasUniqueObjectRepresentations(const ASTContext &Context,
	const RecordDecl *RD) {
	assert(!RD->isUnion() && "Must be struct/class type");
	const auto &Layout = Context.getASTRecordLayout(RD);

	int64_t CurOffsetInBits = 0;
	if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD)) {
	if (ClassDecl->isDynamicClass())
	return std::nullopt;

	SmallVector<CXXRecordDecl *, 4> Bases;
	for (const auto &Base : ClassDecl->bases()) {
	// Empty types can be inherited from, and non-empty types can potentially
	// have tail padding, so just make sure there isn't an error.
	Bases.emplace_back(Base.getType()->getAsCXXRecordDecl());
	}

	llvm::sort(Bases, [&](const CXXRecordDecl L, const CXXRecordDecl R) {
	return Layout.getBaseClassOffset(L) < Layout.getBaseClassOffset(R);
	});

	std::optional<int64_t> OffsetAfterBases =
	structSubobjectsHaveUniqueObjectRepresentations(Bases, CurOffsetInBits,
	Context, Layout);
	if (!OffsetAfterBases)
	return std::nullopt;
	CurOffsetInBits = *OffsetAfterBases;
	}

	std::optional<int64_t> OffsetAfterFields =
	structSubobjectsHaveUniqueObjectRepresentations(
	RD->fields(), CurOffsetInBits, Context, Layout);
	if (!OffsetAfterFields)
	return std::nullopt;
	CurOffsetInBits = *OffsetAfterFields;

	return CurOffsetInBits;
	}

	bool ASTContext::hasUniqueObjectRepresentations(QualType Ty) const {
	// C++17 [meta.unary.prop]:
	// The predicate condition for a template specialization
	// has_unique_object_representations<T> shall be
	// satisfied if and only if:
	// (9.1) - T is trivially copyable, and
	// (9.2) - any two objects of type T with the same value have the same
	// object representation, where two objects
	// of array or non-union class type are considered to have the same value
	// if their respective sequences of
	// direct subobjects have the same values, and two objects of union type
	// are considered to have the same
	// value if they have the same active member and the corresponding members
	// have the same value.
	// The set of scalar types for which this condition holds is
	// implementation-defined. [ Note: If a type has padding
	// bits, the condition does not hold; otherwise, the condition holds true
	// for unsigned integral types. -- end note ]
	assert(!Ty.isNull() && "Null QualType sent to unique object rep check");

	// Arrays are unique only if their element type is unique.
	if (Ty->isArrayType())
	return hasUniqueObjectRepresentations(getBaseElementType(Ty));

	// (9.1) - T is trivially copyable...
	if (!Ty.isTriviallyCopyableType(*this))
	return false;

	// All integrals and enums are unique.
	if (Ty->isIntegralOrEnumerationType()) {
	// Except _BitInt types that have padding bits.
	if (const auto *BIT = dyn_cast<BitIntType>(Ty))
	return getTypeSize(BIT) == BIT->getNumBits();

	return true;
	}

	// All other pointers are unique.
	if (Ty->isPointerType())
	return true;

	if (Ty->isMemberPointerType()) {
	const auto *MPT = Ty->getAs<MemberPointerType>();
	return !ABI->getMemberPointerInfo(MPT).HasPadding;
	}

	if (Ty->isRecordType()) {
	const RecordDecl *Record = Ty->castAs<RecordType>()->getDecl();

	if (Record->isInvalidDecl())
	return false;

	if (Record->isUnion())
	return unionHasUniqueObjectRepresentations(*this, Record);

	std::optional<int64_t> StructSize =
	structHasUniqueObjectRepresentations(*this, Record);

	return StructSize && *StructSize == static_cast<int64_t>(getTypeSize(Ty));
	}

	// FIXME: More cases to handle here (list by rsmith):
	// vectors (careful about, eg, vector of 3 foo)
	// _Complex int and friends
	// _Atomic T
	// Obj-C block pointers
	// Obj-C object pointers
	// and perhaps OpenCL's various builtin types (pipe, sampler_t, event_t,
	// clk_event_t, queue_t, reserve_id_t)
	// There're also Obj-C class types and the Obj-C selector type, but I think it
	// makes sense for those to return false here.

	return false;
	}

	unsigned ASTContext::CountNonClassIvars(const ObjCInterfaceDecl *OI) const {
	unsigned count = 0;
	// Count ivars declared in class extension.
	for (const auto *Ext : OI->known_extensions())
	count += Ext->ivar_size();

	// Count ivar defined in this class's implementation. This
	// includes synthesized ivars.
	if (ObjCImplementationDecl *ImplDecl = OI->getImplementation())
	count += ImplDecl->ivar_size();

	return count;
	}

	bool ASTContext::isSentinelNullExpr(const Expr *E) {
	if (!E)
	return false;

	// nullptr_t is always treated as null.
	if (E->getType()->isNullPtrType()) return true;

	if (E->getType()->isAnyPointerType() &&
	E->IgnoreParenCasts()->isNullPointerConstant(*this,
	Expr::NPC_ValueDependentIsNull))
	return true;

	// Unfortunately, __null has type 'int'.
	if (isa<GNUNullExpr>(E)) return true;

	return false;
	}

	/// Get the implementation of ObjCInterfaceDecl, or nullptr if none
	/// exists.
	ObjCImplementationDecl ASTContext::getObjCImplementation(ObjCInterfaceDecl D) {
	llvm::DenseMap<ObjCContainerDecl, ObjCImplDecl>::iterator
	I = ObjCImpls.find(D);
	if (I != ObjCImpls.end())
	return cast<ObjCImplementationDecl>(I->second);
	return nullptr;
	}

	/// Get the implementation of ObjCCategoryDecl, or nullptr if none
	/// exists.
	ObjCCategoryImplDecl ASTContext::getObjCImplementation(ObjCCategoryDecl D) {
	llvm::DenseMap<ObjCContainerDecl, ObjCImplDecl>::iterator
	I = ObjCImpls.find(D);
	if (I != ObjCImpls.end())
	return cast<ObjCCategoryImplDecl>(I->second);
	return nullptr;
	}

	/// Set the implementation of ObjCInterfaceDecl.
	void ASTContext::setObjCImplementation(ObjCInterfaceDecl *IFaceD,
	ObjCImplementationDecl *ImplD) {
	assert(IFaceD && ImplD && "Passed null params");
	ObjCImpls[IFaceD] = ImplD;
	}

	/// Set the implementation of ObjCCategoryDecl.
	void ASTContext::setObjCImplementation(ObjCCategoryDecl *CatD,
	ObjCCategoryImplDecl *ImplD) {
	assert(CatD && ImplD && "Passed null params");
	ObjCImpls[CatD] = ImplD;
	}

	const ObjCMethodDecl *
	ASTContext::getObjCMethodRedeclaration(const ObjCMethodDecl *MD) const {
	return ObjCMethodRedecls.lookup(MD);
	}

	void ASTContext::setObjCMethodRedeclaration(const ObjCMethodDecl *MD,
	const ObjCMethodDecl *Redecl) {
	assert(!getObjCMethodRedeclaration(MD) && "MD already has a redeclaration");
	ObjCMethodRedecls[MD] = Redecl;
	}

	const ObjCInterfaceDecl *ASTContext::getObjContainingInterface(
	const NamedDecl *ND) const {
	if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(ND->getDeclContext()))
	return ID;
	if (const auto *CD = dyn_cast<ObjCCategoryDecl>(ND->getDeclContext()))
	return CD->getClassInterface();
	if (const auto *IMD = dyn_cast<ObjCImplDecl>(ND->getDeclContext()))
	return IMD->getClassInterface();

	return nullptr;
	}

	/// Get the copy initialization expression of VarDecl, or nullptr if
	/// none exists.
	BlockVarCopyInit ASTContext::getBlockVarCopyInit(const VarDecl *VD) const {
	assert(VD && "Passed null params");
	assert(VD->hasAttr<BlocksAttr>() &&
	"getBlockVarCopyInits - not __block var");
	auto I = BlockVarCopyInits.find(VD);
	if (I != BlockVarCopyInits.end())
	return I->second;
	return {nullptr, false};
	}

	/// Set the copy initialization expression of a block var decl.
	void ASTContext::setBlockVarCopyInit(const VarDeclVD, Expr CopyExpr,
	bool CanThrow) {
	assert(VD && CopyExpr && "Passed null params");
	assert(VD->hasAttr<BlocksAttr>() &&
	"setBlockVarCopyInits - not __block var");
	BlockVarCopyInits[VD].setExprAndFlag(CopyExpr, CanThrow);
	}

	TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T,
	unsigned DataSize) const {
	if (!DataSize)
	DataSize = TypeLoc::getFullDataSizeForType(T);
	else
	assert(DataSize == TypeLoc::getFullDataSizeForType(T) &&
	"incorrect data size provided to CreateTypeSourceInfo!");

	auto *TInfo =
	(TypeSourceInfo*)BumpAlloc.Allocate(sizeof(TypeSourceInfo) + DataSize, 8);
	new (TInfo) TypeSourceInfo(T);
	return TInfo;
	}

	TypeSourceInfo *ASTContext::getTrivialTypeSourceInfo(QualType T,
	SourceLocation L) const {
	TypeSourceInfo *DI = CreateTypeSourceInfo(T);
	DI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L);
	return DI;
	}

	const ASTRecordLayout &
	ASTContext::getASTObjCInterfaceLayout(const ObjCInterfaceDecl *D) const {
	return getObjCLayout(D, nullptr);
	}

	const ASTRecordLayout &
	ASTContext::getASTObjCImplementationLayout(
	const ObjCImplementationDecl *D) const {
	return getObjCLayout(D->getClassInterface(), D);
	}

	static auto getCanonicalTemplateArguments(const ASTContext &C,
	ArrayRef<TemplateArgument> Args,
	bool &AnyNonCanonArgs) {
	SmallVector<TemplateArgument, 16> CanonArgs(Args);
	for (auto &Arg : CanonArgs) {
	TemplateArgument OrigArg = Arg;
	Arg = C.getCanonicalTemplateArgument(Arg);
	AnyNonCanonArgs \|= !Arg.structurallyEquals(OrigArg);
	}
	return CanonArgs;
	}

	//===----------------------------------------------------------------------===//
	// Type creation/memoization methods
	//===----------------------------------------------------------------------===//

	QualType
	ASTContext::getExtQualType(const Type *baseType, Qualifiers quals) const {
	unsigned fastQuals = quals.getFastQualifiers();
	quals.removeFastQualifiers();

	// Check if we've already instantiated this type.
	llvm::FoldingSetNodeID ID;
	ExtQuals::Profile(ID, baseType, quals);
	void *insertPos = nullptr;
	if (ExtQuals *eq = ExtQualNodes.FindNodeOrInsertPos(ID, insertPos)) {
	assert(eq->getQualifiers() == quals);
	return QualType(eq, fastQuals);
	}

	// If the base type is not canonical, make the appropriate canonical type.
	QualType canon;
	if (!baseType->isCanonicalUnqualified()) {
	SplitQualType canonSplit = baseType->getCanonicalTypeInternal().split();
	canonSplit.Quals.addConsistentQualifiers(quals);
	canon = getExtQualType(canonSplit.Ty, canonSplit.Quals);

	// Re-find the insert position.
	(void) ExtQualNodes.FindNodeOrInsertPos(ID, insertPos);
	}

	auto eq = new (this, TypeAlignment) ExtQuals(baseType, canon, quals);
	ExtQualNodes.InsertNode(eq, insertPos);
	return QualType(eq, fastQuals);
	}

	QualType ASTContext::getAddrSpaceQualType(QualType T,
	LangAS AddressSpace) const {
	QualType CanT = getCanonicalType(T);
	if (CanT.getAddressSpace() == AddressSpace)
	return T;

	// If we are composing extended qualifiers together, merge together
	// into one ExtQuals node.
	QualifierCollector Quals;
	const Type *TypeNode = Quals.strip(T);

	// If this type already has an address space specified, it cannot get
	// another one.
	assert(!Quals.hasAddressSpace() &&
	"Type cannot be in multiple addr spaces!");
	Quals.addAddressSpace(AddressSpace);

	return getExtQualType(TypeNode, Quals);
	}

	QualType ASTContext::removeAddrSpaceQualType(QualType T) const {
	// If the type is not qualified with an address space, just return it
	// immediately.
	if (!T.hasAddressSpace())
	return T;

	// If we are composing extended qualifiers together, merge together
	// into one ExtQuals node.
	QualifierCollector Quals;
	const Type *TypeNode;

	while (T.hasAddressSpace()) {
	TypeNode = Quals.strip(T);

	// If the type no longer has an address space after stripping qualifiers,
	// jump out.
	if (!QualType(TypeNode, 0).hasAddressSpace())
	break;

	// There might be sugar in the way. Strip it and try again.
	T = T.getSingleStepDesugaredType(*this);
	}

	Quals.removeAddressSpace();

	// Removal of the address space can mean there are no longer any
	// non-fast qualifiers, so creating an ExtQualType isn't possible (asserts)
	// or required.
	if (Quals.hasNonFastQualifiers())
	return getExtQualType(TypeNode, Quals);
	else
	return QualType(TypeNode, Quals.getFastQualifiers());
	}

	QualType ASTContext::getObjCGCQualType(QualType T,
	Qualifiers::GC GCAttr) const {
	QualType CanT = getCanonicalType(T);
	if (CanT.getObjCGCAttr() == GCAttr)
	return T;

	if (const auto *ptr = T->getAs<PointerType>()) {
	QualType Pointee = ptr->getPointeeType();
	if (Pointee->isAnyPointerType()) {
	QualType ResultType = getObjCGCQualType(Pointee, GCAttr);
	return getPointerType(ResultType);
	}
	}

	// If we are composing extended qualifiers together, merge together
	// into one ExtQuals node.
	QualifierCollector Quals;
	const Type *TypeNode = Quals.strip(T);

	// If this type already has an ObjCGC specified, it cannot get
	// another one.
	assert(!Quals.hasObjCGCAttr() &&
	"Type cannot have multiple ObjCGCs!");
	Quals.addObjCGCAttr(GCAttr);

	return getExtQualType(TypeNode, Quals);
	}

	QualType ASTContext::removePtrSizeAddrSpace(QualType T) const {
	if (const PointerType *Ptr = T->getAs<PointerType>()) {
	QualType Pointee = Ptr->getPointeeType();
	if (isPtrSizeAddressSpace(Pointee.getAddressSpace())) {
	return getPointerType(removeAddrSpaceQualType(Pointee));
	}
	}
	return T;
	}

	const FunctionType ASTContext::adjustFunctionType(const FunctionType T,
	FunctionType::ExtInfo Info) {
	if (T->getExtInfo() == Info)
	return T;

	QualType Result;
	if (const auto *FNPT = dyn_cast<FunctionNoProtoType>(T)) {
	Result = getFunctionNoProtoType(FNPT->getReturnType(), Info);
	} else {
	const auto *FPT = cast<FunctionProtoType>(T);
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.ExtInfo = Info;
	Result = getFunctionType(FPT->getReturnType(), FPT->getParamTypes(), EPI);
	}

	return cast<FunctionType>(Result.getTypePtr());
	}

	void ASTContext::adjustDeducedFunctionResultType(FunctionDecl *FD,
	QualType ResultType) {
	FD = FD->getMostRecentDecl();
	while (true) {
	const auto *FPT = FD->getType()->castAs<FunctionProtoType>();
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	FD->setType(getFunctionType(ResultType, FPT->getParamTypes(), EPI));
	if (FunctionDecl *Next = FD->getPreviousDecl())
	FD = Next;
	else
	break;
	}
	if (ASTMutationListener *L = getASTMutationListener())
	L->DeducedReturnType(FD, ResultType);
	}

	/// Get a function type and produce the equivalent function type with the
	/// specified exception specification. Type sugar that can be present on a
	/// declaration of a function with an exception specification is permitted
	/// and preserved. Other type sugar (for instance, typedefs) is not.
	QualType ASTContext::getFunctionTypeWithExceptionSpec(
	QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) const {
	// Might have some parens.
	if (const auto *PT = dyn_cast<ParenType>(Orig))
	return getParenType(
	getFunctionTypeWithExceptionSpec(PT->getInnerType(), ESI));

	// Might be wrapped in a macro qualified type.
	if (const auto *MQT = dyn_cast<MacroQualifiedType>(Orig))
	return getMacroQualifiedType(
	getFunctionTypeWithExceptionSpec(MQT->getUnderlyingType(), ESI),
	MQT->getMacroIdentifier());

	// Might have a calling-convention attribute.
	if (const auto *AT = dyn_cast<AttributedType>(Orig))
	return getAttributedType(
	AT->getAttrKind(),
	getFunctionTypeWithExceptionSpec(AT->getModifiedType(), ESI),
	getFunctionTypeWithExceptionSpec(AT->getEquivalentType(), ESI));

	// Anything else must be a function type. Rebuild it with the new exception
	// specification.
	const auto *Proto = Orig->castAs<FunctionProtoType>();
	return getFunctionType(
	Proto->getReturnType(), Proto->getParamTypes(),
	Proto->getExtProtoInfo().withExceptionSpec(ESI));
	}

	bool ASTContext::hasSameFunctionTypeIgnoringExceptionSpec(QualType T,
	QualType U) const {
	return hasSameType(T, U) \|\|
	(getLangOpts().CPlusPlus17 &&
	hasSameType(getFunctionTypeWithExceptionSpec(T, EST_None),
	getFunctionTypeWithExceptionSpec(U, EST_None)));
	}

	QualType ASTContext::getFunctionTypeWithoutPtrSizes(QualType T) {
	if (const auto *Proto = T->getAs<FunctionProtoType>()) {
	QualType RetTy = removePtrSizeAddrSpace(Proto->getReturnType());
	SmallVector<QualType, 16> Args(Proto->param_types().size());
	for (unsigned i = 0, n = Args.size(); i != n; ++i)
	Args[i] = removePtrSizeAddrSpace(Proto->param_types()[i]);
	return getFunctionType(RetTy, Args, Proto->getExtProtoInfo());
	}

	if (const FunctionNoProtoType *Proto = T->getAs<FunctionNoProtoType>()) {
	QualType RetTy = removePtrSizeAddrSpace(Proto->getReturnType());
	return getFunctionNoProtoType(RetTy, Proto->getExtInfo());
	}

	return T;
	}

	bool ASTContext::hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U) {
	return hasSameType(T, U) \|\|
	hasSameType(getFunctionTypeWithoutPtrSizes(T),
	getFunctionTypeWithoutPtrSizes(U));
	}

	void ASTContext::adjustExceptionSpec(
	FunctionDecl *FD, const FunctionProtoType::ExceptionSpecInfo &ESI,
	bool AsWritten) {
	// Update the type.
	QualType Updated =
	getFunctionTypeWithExceptionSpec(FD->getType(), ESI);
	FD->setType(Updated);

	if (!AsWritten)
	return;

	// Update the type in the type source information too.
	if (TypeSourceInfo *TSInfo = FD->getTypeSourceInfo()) {
	// If the type and the type-as-written differ, we may need to update
	// the type-as-written too.
	if (TSInfo->getType() != FD->getType())
	Updated = getFunctionTypeWithExceptionSpec(TSInfo->getType(), ESI);

	// FIXME: When we get proper type location information for exceptions,
	// we'll also have to rebuild the TypeSourceInfo. For now, we just patch
	// up the TypeSourceInfo;
	assert(TypeLoc::getFullDataSizeForType(Updated) ==
	TypeLoc::getFullDataSizeForType(TSInfo->getType()) &&
	"TypeLoc size mismatch from updating exception specification");
	TSInfo->overrideType(Updated);
	}
	}

	/// getComplexType - Return the uniqued reference to the type for a complex
	/// number with the specified element type.
	QualType ASTContext::getComplexType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	ComplexType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (ComplexType *CT = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(CT, 0);

	// If the pointee type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getComplexType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	ComplexType *NewIP = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) ComplexType(T, Canonical);
	Types.push_back(New);
	ComplexTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getPointerType - Return the uniqued reference to the type for a pointer to
	/// the specified type.
	QualType ASTContext::getPointerType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	PointerType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (PointerType *PT = PointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the pointee type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getPointerType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	PointerType *NewIP = PointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) PointerType(T, Canonical);
	Types.push_back(New);
	PointerTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	QualType ASTContext::getAdjustedType(QualType Orig, QualType New) const {
	llvm::FoldingSetNodeID ID;
	AdjustedType::Profile(ID, Orig, New);
	void *InsertPos = nullptr;
	AdjustedType *AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (AT)
	return QualType(AT, 0);

	QualType Canonical = getCanonicalType(New);

	// Get the new insert position for the node we care about.
	AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!AT && "Shouldn't be in the map!");

	AT = new (*this, TypeAlignment)
	AdjustedType(Type::Adjusted, Orig, New, Canonical);
	Types.push_back(AT);
	AdjustedTypes.InsertNode(AT, InsertPos);
	return QualType(AT, 0);
	}

	QualType ASTContext::getDecayedType(QualType Orig, QualType Decayed) const {
	llvm::FoldingSetNodeID ID;
	AdjustedType::Profile(ID, Orig, Decayed);
	void *InsertPos = nullptr;
	AdjustedType *AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (AT)
	return QualType(AT, 0);

	QualType Canonical = getCanonicalType(Decayed);

	// Get the new insert position for the node we care about.
	AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!AT && "Shouldn't be in the map!");

	AT = new (*this, TypeAlignment) DecayedType(Orig, Decayed, Canonical);
	Types.push_back(AT);
	AdjustedTypes.InsertNode(AT, InsertPos);
	return QualType(AT, 0);
	}

	QualType ASTContext::getDecayedType(QualType T) const {
	assert((T->isArrayType() \|\| T->isFunctionType()) && "T does not decay");

	QualType Decayed;

	// C99 6.7.5.3p7:
	// A declaration of a parameter as "array of type" shall be
	// adjusted to "qualified pointer to type", where the type
	// qualifiers (if any) are those specified within the [ and ] of
	// the array type derivation.
	if (T->isArrayType())
	Decayed = getArrayDecayedType(T);

	// C99 6.7.5.3p8:
	// A declaration of a parameter as "function returning type"
	// shall be adjusted to "pointer to function returning type", as
	// in 6.3.2.1.
	if (T->isFunctionType())
	Decayed = getPointerType(T);

	return getDecayedType(T, Decayed);
	}

	/// getBlockPointerType - Return the uniqued reference to the type for
	/// a pointer to the specified block.
	QualType ASTContext::getBlockPointerType(QualType T) const {
	assert(T->isFunctionType() && "block of function types only");
	// Unique pointers, to guarantee there is only one block of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	BlockPointerType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (BlockPointerType *PT =
	BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the block pointee type isn't canonical, this won't be a canonical
	// type either so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getBlockPointerType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	BlockPointerType *NewIP =
	BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) BlockPointerType(T, Canonical);
	Types.push_back(New);
	BlockPointerTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getLValueReferenceType - Return the uniqued reference to the type for an
	/// lvalue reference to the specified type.
	QualType
	ASTContext::getLValueReferenceType(QualType T, bool SpelledAsLValue) const {
	assert((!T->isPlaceholderType() \|\|
	T->isSpecificPlaceholderType(BuiltinType::UnknownAny)) &&
	"Unresolved placeholder type");

	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	ReferenceType::Profile(ID, T, SpelledAsLValue);

	void *InsertPos = nullptr;
	if (LValueReferenceType *RT =
	LValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(RT, 0);

	const auto *InnerRef = T->getAs<ReferenceType>();

	// If the referencee type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (!SpelledAsLValue \|\| InnerRef \|\| !T.isCanonical()) {
	QualType PointeeType = (InnerRef ? InnerRef->getPointeeType() : T);
	Canonical = getLValueReferenceType(getCanonicalType(PointeeType));

	// Get the new insert position for the node we care about.
	LValueReferenceType *NewIP =
	LValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	auto New = new (this, TypeAlignment) LValueReferenceType(T, Canonical,
	SpelledAsLValue);
	Types.push_back(New);
	LValueReferenceTypes.InsertNode(New, InsertPos);

	return QualType(New, 0);
	}

	/// getRValueReferenceType - Return the uniqued reference to the type for an
	/// rvalue reference to the specified type.
	QualType ASTContext::getRValueReferenceType(QualType T) const {
	assert((!T->isPlaceholderType() \|\|
	T->isSpecificPlaceholderType(BuiltinType::UnknownAny)) &&
	"Unresolved placeholder type");

	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	ReferenceType::Profile(ID, T, false);

	void *InsertPos = nullptr;
	if (RValueReferenceType *RT =
	RValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(RT, 0);

	const auto *InnerRef = T->getAs<ReferenceType>();

	// If the referencee type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (InnerRef \|\| !T.isCanonical()) {
	QualType PointeeType = (InnerRef ? InnerRef->getPointeeType() : T);
	Canonical = getRValueReferenceType(getCanonicalType(PointeeType));

	// Get the new insert position for the node we care about.
	RValueReferenceType *NewIP =
	RValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	auto New = new (this, TypeAlignment) RValueReferenceType(T, Canonical);
	Types.push_back(New);
	RValueReferenceTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getMemberPointerType - Return the uniqued reference to the type for a
	/// member pointer to the specified type, in the specified class.
	QualType ASTContext::getMemberPointerType(QualType T, const Type *Cls) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	MemberPointerType::Profile(ID, T, Cls);

	void *InsertPos = nullptr;
	if (MemberPointerType *PT =
	MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the pointee or class type isn't canonical, this won't be a canonical
	// type either, so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical() \|\| !Cls->isCanonicalUnqualified()) {
	Canonical = getMemberPointerType(getCanonicalType(T),getCanonicalType(Cls));

	// Get the new insert position for the node we care about.
	MemberPointerType *NewIP =
	MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) MemberPointerType(T, Cls, Canonical);
	Types.push_back(New);
	MemberPointerTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getConstantArrayType - Return the unique reference to the type for an
	/// array of the specified element type.
	QualType ASTContext::getConstantArrayType(QualType EltTy,
	const llvm::APInt &ArySizeIn,
	const Expr *SizeExpr,
	ArrayType::ArraySizeModifier ASM,
	unsigned IndexTypeQuals) const {
	assert((EltTy->isDependentType() \|\|
	EltTy->isIncompleteType() \|\| EltTy->isConstantSizeType()) &&
	"Constant array of VLAs is illegal!");

	// We only need the size as part of the type if it's instantiation-dependent.
	if (SizeExpr && !SizeExpr->isInstantiationDependent())
	SizeExpr = nullptr;

	// Convert the array size into a canonical width matching the pointer size for
	// the target.
	llvm::APInt ArySize(ArySizeIn);
	ArySize = ArySize.zextOrTrunc(Target->getMaxPointerWidth());

	llvm::FoldingSetNodeID ID;
	ConstantArrayType::Profile(ID, *this, EltTy, ArySize, SizeExpr, ASM,
	IndexTypeQuals);

	void *InsertPos = nullptr;
	if (ConstantArrayType *ATP =
	ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(ATP, 0);

	// If the element type isn't canonical or has qualifiers, or the array bound
	// is instantiation-dependent, this won't be a canonical type either, so fill
	// in the canonical type field.
	QualType Canon;
	// FIXME: Check below should look for qualifiers behind sugar.
	if (!EltTy.isCanonical() \|\| EltTy.hasLocalQualifiers() \|\| SizeExpr) {
	SplitQualType canonSplit = getCanonicalType(EltTy).split();
	Canon = getConstantArrayType(QualType(canonSplit.Ty, 0), ArySize, nullptr,
	ASM, IndexTypeQuals);
	Canon = getQualifiedType(Canon, canonSplit.Quals);

	// Get the new insert position for the node we care about.
	ConstantArrayType *NewIP =
	ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	void *Mem = Allocate(
	ConstantArrayType::totalSizeToAlloc<const Expr *>(SizeExpr ? 1 : 0),
	TypeAlignment);
	auto *New = new (Mem)
	ConstantArrayType(EltTy, Canon, ArySize, SizeExpr, ASM, IndexTypeQuals);
	ConstantArrayTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	/// getVariableArrayDecayedType - Turns the given type, which may be
	/// variably-modified, into the corresponding type with all the known
	/// sizes replaced with [*].
	QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
	// Vastly most common case.
	if (!type->isVariablyModifiedType()) return type;

	QualType result;

	SplitQualType split = type.getSplitDesugaredType();
	const Type *ty = split.Ty;
	switch (ty->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("didn't desugar past all non-canonical types?");

	// These types should never be variably-modified.
	case Type::Builtin:
	case Type::Complex:
	case Type::Vector:
	case Type::DependentVector:
	case Type::ExtVector:
	case Type::DependentSizedExtVector:
	case Type::ConstantMatrix:
	case Type::DependentSizedMatrix:
	case Type::DependentAddressSpace:
	case Type::ObjCObject:
	case Type::ObjCInterface:
	case Type::ObjCObjectPointer:
	case Type::Record:
	case Type::Enum:
	case Type::UnresolvedUsing:
	case Type::TypeOfExpr:
	case Type::TypeOf:
	case Type::Decltype:
	case Type::UnaryTransform:
	case Type::DependentName:
	case Type::InjectedClassName:
	case Type::TemplateSpecialization:
	case Type::DependentTemplateSpecialization:
	case Type::TemplateTypeParm:
	case Type::SubstTemplateTypeParmPack:
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	case Type::PackExpansion:
	case Type::BitInt:
	case Type::DependentBitInt:
	llvm_unreachable("type should never be variably-modified");

	// These types can be variably-modified but should never need to
	// further decay.
	case Type::FunctionNoProto:
	case Type::FunctionProto:
	case Type::BlockPointer:
	case Type::MemberPointer:
	case Type::Pipe:
	return type;

	// These types can be variably-modified. All these modifications
	// preserve structure except as noted by comments.
	// TODO: if we ever care about optimizing VLAs, there are no-op
	// optimizations available here.
	case Type::Pointer:
	result = getPointerType(getVariableArrayDecayedType(
	cast<PointerType>(ty)->getPointeeType()));
	break;

	case Type::LValueReference: {
	const auto *lv = cast<LValueReferenceType>(ty);
	result = getLValueReferenceType(
	getVariableArrayDecayedType(lv->getPointeeType()),
	lv->isSpelledAsLValue());
	break;
	}

	case Type::RValueReference: {
	const auto *lv = cast<RValueReferenceType>(ty);
	result = getRValueReferenceType(
	getVariableArrayDecayedType(lv->getPointeeType()));
	break;
	}

	case Type::Atomic: {
	const auto *at = cast<AtomicType>(ty);
	result = getAtomicType(getVariableArrayDecayedType(at->getValueType()));
	break;
	}

	case Type::ConstantArray: {
	const auto *cat = cast<ConstantArrayType>(ty);
	result = getConstantArrayType(
	getVariableArrayDecayedType(cat->getElementType()),
	cat->getSize(),
	cat->getSizeExpr(),
	cat->getSizeModifier(),
	cat->getIndexTypeCVRQualifiers());
	break;
	}

	case Type::DependentSizedArray: {
	const auto *dat = cast<DependentSizedArrayType>(ty);
	result = getDependentSizedArrayType(
	getVariableArrayDecayedType(dat->getElementType()),
	dat->getSizeExpr(),
	dat->getSizeModifier(),
	dat->getIndexTypeCVRQualifiers(),
	dat->getBracketsRange());
	break;
	}

	// Turn incomplete types into [*] types.
	case Type::IncompleteArray: {
	const auto *iat = cast<IncompleteArrayType>(ty);
	result = getVariableArrayType(
	getVariableArrayDecayedType(iat->getElementType()),
	/size/ nullptr,
	ArrayType::Normal,
	iat->getIndexTypeCVRQualifiers(),
	SourceRange());
	break;
	}

	// Turn VLA types into [*] types.
	case Type::VariableArray: {
	const auto *vat = cast<VariableArrayType>(ty);
	result = getVariableArrayType(
	getVariableArrayDecayedType(vat->getElementType()),
	/size/ nullptr,
	ArrayType::Star,
	vat->getIndexTypeCVRQualifiers(),
	vat->getBracketsRange());
	break;
	}
	}

	// Apply the top-level qualifiers from the original.
	return getQualifiedType(result, split.Quals);
	}

	/// getVariableArrayType - Returns a non-unique reference to the type for a
	/// variable array of the specified element type.
	QualType ASTContext::getVariableArrayType(QualType EltTy,
	Expr *NumElts,
	ArrayType::ArraySizeModifier ASM,
	unsigned IndexTypeQuals,
	SourceRange Brackets) const {
	// Since we don't unique expressions, it isn't possible to unique VLA's
	// that have an expression provided for their size.
	QualType Canon;

	// Be sure to pull qualifiers off the element type.
	// FIXME: Check below should look for qualifiers behind sugar.
	if (!EltTy.isCanonical() \|\| EltTy.hasLocalQualifiers()) {
	SplitQualType canonSplit = getCanonicalType(EltTy).split();
	Canon = getVariableArrayType(QualType(canonSplit.Ty, 0), NumElts, ASM,
	IndexTypeQuals, Brackets);
	Canon = getQualifiedType(Canon, canonSplit.Quals);
	}

	auto New = new (this, TypeAlignment)
	VariableArrayType(EltTy, Canon, NumElts, ASM, IndexTypeQuals, Brackets);

	VariableArrayTypes.push_back(New);
	Types.push_back(New);
	return QualType(New, 0);
	}

	/// getDependentSizedArrayType - Returns a non-unique reference to
	/// the type for a dependently-sized array of the specified element
	/// type.
	QualType ASTContext::getDependentSizedArrayType(QualType elementType,
	Expr *numElements,
	ArrayType::ArraySizeModifier ASM,
	unsigned elementTypeQuals,
	SourceRange brackets) const {
	assert((!numElements \|\| numElements->isTypeDependent() \|\|
	numElements->isValueDependent()) &&
	"Size must be type- or value-dependent!");

	// Dependently-sized array types that do not have a specified number
	// of elements will have their sizes deduced from a dependent
	// initializer. We do no canonicalization here at all, which is okay
	// because they can't be used in most locations.
	if (!numElements) {
	auto *newType
	= new (*this, TypeAlignment)
	DependentSizedArrayType(*this, elementType, QualType(),
	numElements, ASM, elementTypeQuals,
	brackets);
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	// Otherwise, we actually build a new type every time, but we
	// also build a canonical type.

	SplitQualType canonElementType = getCanonicalType(elementType).split();

	void *insertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	DependentSizedArrayType::Profile(ID, *this,
	QualType(canonElementType.Ty, 0),
	ASM, elementTypeQuals, numElements);

	// Look for an existing type with these properties.
	DependentSizedArrayType *canonTy =
	DependentSizedArrayTypes.FindNodeOrInsertPos(ID, insertPos);

	// If we don't have one, build one.
	if (!canonTy) {
	canonTy = new (*this, TypeAlignment)
	DependentSizedArrayType(*this, QualType(canonElementType.Ty, 0),
	QualType(), numElements, ASM, elementTypeQuals,
	brackets);
	DependentSizedArrayTypes.InsertNode(canonTy, insertPos);
	Types.push_back(canonTy);
	}

	// Apply qualifiers from the element type to the array.
	QualType canon = getQualifiedType(QualType(canonTy,0),
	canonElementType.Quals);

	// If we didn't need extra canonicalization for the element type or the size
	// expression, then just use that as our result.
	if (QualType(canonElementType.Ty, 0) == elementType &&
	canonTy->getSizeExpr() == numElements)
	return canon;

	// Otherwise, we need to build a type which follows the spelling
	// of the element type.
	auto *sugaredType
	= new (*this, TypeAlignment)
	DependentSizedArrayType(*this, elementType, canon, numElements,
	ASM, elementTypeQuals, brackets);
	Types.push_back(sugaredType);
	return QualType(sugaredType, 0);
	}

	QualType ASTContext::getIncompleteArrayType(QualType elementType,
	ArrayType::ArraySizeModifier ASM,
	unsigned elementTypeQuals) const {
	llvm::FoldingSetNodeID ID;
	IncompleteArrayType::Profile(ID, elementType, ASM, elementTypeQuals);

	void *insertPos = nullptr;
	if (IncompleteArrayType *iat =
	IncompleteArrayTypes.FindNodeOrInsertPos(ID, insertPos))
	return QualType(iat, 0);

	// If the element type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field. We also have to pull
	// qualifiers off the element type.
	QualType canon;

	// FIXME: Check below should look for qualifiers behind sugar.
	if (!elementType.isCanonical() \|\| elementType.hasLocalQualifiers()) {
	SplitQualType canonSplit = getCanonicalType(elementType).split();
	canon = getIncompleteArrayType(QualType(canonSplit.Ty, 0),
	ASM, elementTypeQuals);
	canon = getQualifiedType(canon, canonSplit.Quals);

	// Get the new insert position for the node we care about.
	IncompleteArrayType *existing =
	IncompleteArrayTypes.FindNodeOrInsertPos(ID, insertPos);
	assert(!existing && "Shouldn't be in the map!"); (void) existing;
	}

	auto newType = new (this, TypeAlignment)
	IncompleteArrayType(elementType, canon, ASM, elementTypeQuals);

	IncompleteArrayTypes.InsertNode(newType, insertPos);
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	ASTContext::BuiltinVectorTypeInfo
	ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
	#define SVE_INT_ELTTY(BITS, ELTS, SIGNED, NUMVECTORS) \
	{getIntTypeForBitwidth(BITS, SIGNED), llvm::ElementCount::getScalable(ELTS), \
	NUMVECTORS};

	#define SVE_ELTTY(ELTTY, ELTS, NUMVECTORS) \
	{ELTTY, llvm::ElementCount::getScalable(ELTS), NUMVECTORS};

	switch (Ty->getKind()) {
	default:
	llvm_unreachable("Unsupported builtin vector type");
	case BuiltinType::SveInt8:
	return SVE_INT_ELTTY(8, 16, true, 1);
	case BuiltinType::SveUint8:
	return SVE_INT_ELTTY(8, 16, false, 1);
	case BuiltinType::SveInt8x2:
	return SVE_INT_ELTTY(8, 16, true, 2);
	case BuiltinType::SveUint8x2:
	return SVE_INT_ELTTY(8, 16, false, 2);
	case BuiltinType::SveInt8x3:
	return SVE_INT_ELTTY(8, 16, true, 3);
	case BuiltinType::SveUint8x3:
	return SVE_INT_ELTTY(8, 16, false, 3);
	case BuiltinType::SveInt8x4:
	return SVE_INT_ELTTY(8, 16, true, 4);
	case BuiltinType::SveUint8x4:
	return SVE_INT_ELTTY(8, 16, false, 4);
	case BuiltinType::SveInt16:
	return SVE_INT_ELTTY(16, 8, true, 1);
	case BuiltinType::SveUint16:
	return SVE_INT_ELTTY(16, 8, false, 1);
	case BuiltinType::SveInt16x2:
	return SVE_INT_ELTTY(16, 8, true, 2);
	case BuiltinType::SveUint16x2:
	return SVE_INT_ELTTY(16, 8, false, 2);
	case BuiltinType::SveInt16x3:
	return SVE_INT_ELTTY(16, 8, true, 3);
	case BuiltinType::SveUint16x3:
	return SVE_INT_ELTTY(16, 8, false, 3);
	case BuiltinType::SveInt16x4:
	return SVE_INT_ELTTY(16, 8, true, 4);
	case BuiltinType::SveUint16x4:
	return SVE_INT_ELTTY(16, 8, false, 4);
	case BuiltinType::SveInt32:
	return SVE_INT_ELTTY(32, 4, true, 1);
	case BuiltinType::SveUint32:
	return SVE_INT_ELTTY(32, 4, false, 1);
	case BuiltinType::SveInt32x2:
	return SVE_INT_ELTTY(32, 4, true, 2);
	case BuiltinType::SveUint32x2:
	return SVE_INT_ELTTY(32, 4, false, 2);
	case BuiltinType::SveInt32x3:
	return SVE_INT_ELTTY(32, 4, true, 3);
	case BuiltinType::SveUint32x3:
	return SVE_INT_ELTTY(32, 4, false, 3);
	case BuiltinType::SveInt32x4:
	return SVE_INT_ELTTY(32, 4, true, 4);
	case BuiltinType::SveUint32x4:
	return SVE_INT_ELTTY(32, 4, false, 4);
	case BuiltinType::SveInt64:
	return SVE_INT_ELTTY(64, 2, true, 1);
	case BuiltinType::SveUint64:
	return SVE_INT_ELTTY(64, 2, false, 1);
	case BuiltinType::SveInt64x2:
	return SVE_INT_ELTTY(64, 2, true, 2);
	case BuiltinType::SveUint64x2:
	return SVE_INT_ELTTY(64, 2, false, 2);
	case BuiltinType::SveInt64x3:
	return SVE_INT_ELTTY(64, 2, true, 3);
	case BuiltinType::SveUint64x3:
	return SVE_INT_ELTTY(64, 2, false, 3);
	case BuiltinType::SveInt64x4:
	return SVE_INT_ELTTY(64, 2, true, 4);
	case BuiltinType::SveUint64x4:
	return SVE_INT_ELTTY(64, 2, false, 4);
	case BuiltinType::SveBool:
	return SVE_ELTTY(BoolTy, 16, 1);
	case BuiltinType::SveFloat16:
	return SVE_ELTTY(HalfTy, 8, 1);
	case BuiltinType::SveFloat16x2:
	return SVE_ELTTY(HalfTy, 8, 2);
	case BuiltinType::SveFloat16x3:
	return SVE_ELTTY(HalfTy, 8, 3);
	case BuiltinType::SveFloat16x4:
	return SVE_ELTTY(HalfTy, 8, 4);
	case BuiltinType::SveFloat32:
	return SVE_ELTTY(FloatTy, 4, 1);
	case BuiltinType::SveFloat32x2:
	return SVE_ELTTY(FloatTy, 4, 2);
	case BuiltinType::SveFloat32x3:
	return SVE_ELTTY(FloatTy, 4, 3);
	case BuiltinType::SveFloat32x4:
	return SVE_ELTTY(FloatTy, 4, 4);
	case BuiltinType::SveFloat64:
	return SVE_ELTTY(DoubleTy, 2, 1);
	case BuiltinType::SveFloat64x2:
	return SVE_ELTTY(DoubleTy, 2, 2);
	case BuiltinType::SveFloat64x3:
	return SVE_ELTTY(DoubleTy, 2, 3);
	case BuiltinType::SveFloat64x4:
	return SVE_ELTTY(DoubleTy, 2, 4);
	case BuiltinType::SveBFloat16:
	return SVE_ELTTY(BFloat16Ty, 8, 1);
	case BuiltinType::SveBFloat16x2:
	return SVE_ELTTY(BFloat16Ty, 8, 2);
	case BuiltinType::SveBFloat16x3:
	return SVE_ELTTY(BFloat16Ty, 8, 3);
	case BuiltinType::SveBFloat16x4:
	return SVE_ELTTY(BFloat16Ty, 8, 4);
	#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
	IsSigned) \
	case BuiltinType::Id: \
	return {getIntTypeForBitwidth(ElBits, IsSigned), \
	llvm::ElementCount::getScalable(NumEls), NF};
	#define RVV_VECTOR_TYPE_FLOAT(Name, Id, SingletonId, NumEls, ElBits, NF) \
	case BuiltinType::Id: \
	return {ElBits == 16 ? Float16Ty : (ElBits == 32 ? FloatTy : DoubleTy), \
	llvm::ElementCount::getScalable(NumEls), NF};
	#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
	case BuiltinType::Id: \
	return {BoolTy, llvm::ElementCount::getScalable(NumEls), 1};
	#include "clang/Basic/RISCVVTypes.def"
	}
	}

	/// getScalableVectorType - Return the unique reference to a scalable vector
	/// type of the specified element type and size. VectorType must be a built-in
	/// type.
	QualType ASTContext::getScalableVectorType(QualType EltTy,
	unsigned NumElts) const {
	if (Target->hasAArch64SVETypes()) {
	uint64_t EltTySize = getTypeSize(EltTy);
	#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \
	IsSigned, IsFP, IsBF) \
	if (!EltTy->isBooleanType() && \
	((EltTy->hasIntegerRepresentation() && \
	EltTy->hasSignedIntegerRepresentation() == IsSigned) \|\| \
	(EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() && \
	IsFP && !IsBF) \|\| \
	(EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() && \
	IsBF && !IsFP)) && \
	EltTySize == ElBits && NumElts == NumEls) { \
	return SingletonId; \
	}
	#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \
	if (EltTy->isBooleanType() && NumElts == NumEls) \
	return SingletonId;
	#include "clang/Basic/AArch64SVEACLETypes.def"
	} else if (Target->hasRISCVVTypes()) {
	uint64_t EltTySize = getTypeSize(EltTy);
	#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, \
	IsFP) \
	if (!EltTy->isBooleanType() && \
	((EltTy->hasIntegerRepresentation() && \
	EltTy->hasSignedIntegerRepresentation() == IsSigned) \|\| \
	(EltTy->hasFloatingRepresentation() && IsFP)) && \
	EltTySize == ElBits && NumElts == NumEls) \
	return SingletonId;
	#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
	if (EltTy->isBooleanType() && NumElts == NumEls) \
	return SingletonId;
	#include "clang/Basic/RISCVVTypes.def"
	}
	return QualType();
	}

	/// getVectorType - Return the unique reference to a vector type of
	/// the specified element type and size. VectorType must be a built-in type.
	QualType ASTContext::getVectorType(QualType vecType, unsigned NumElts,
	VectorType::VectorKind VecKind) const {
	assert(vecType->isBuiltinType() \|\|
	(vecType->isBitIntType() &&
	// Only support _BitInt elements with byte-sized power of 2 NumBits.
	llvm::isPowerOf2_32(vecType->getAs<BitIntType>()->getNumBits()) &&
	vecType->getAs<BitIntType>()->getNumBits() >= 8));

	// Check if we've already instantiated a vector of this type.
	llvm::FoldingSetNodeID ID;
	VectorType::Profile(ID, vecType, NumElts, Type::Vector, VecKind);

	void *InsertPos = nullptr;
	if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(VTP, 0);

	// If the element type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!vecType.isCanonical()) {
	Canonical = getVectorType(getCanonicalType(vecType), NumElts, VecKind);

	// Get the new insert position for the node we care about.
	VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment)
	VectorType(vecType, NumElts, Canonical, VecKind);
	VectorTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType
	ASTContext::getDependentVectorType(QualType VecType, Expr *SizeExpr,
	SourceLocation AttrLoc,
	VectorType::VectorKind VecKind) const {
	llvm::FoldingSetNodeID ID;
	DependentVectorType::Profile(ID, *this, getCanonicalType(VecType), SizeExpr,
	VecKind);
	void *InsertPos = nullptr;
	DependentVectorType *Canon =
	DependentVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	DependentVectorType *New;

	if (Canon) {
	New = new (*this, TypeAlignment) DependentVectorType(
	*this, VecType, QualType(Canon, 0), SizeExpr, AttrLoc, VecKind);
	} else {
	QualType CanonVecTy = getCanonicalType(VecType);
	if (CanonVecTy == VecType) {
	New = new (*this, TypeAlignment) DependentVectorType(
	*this, VecType, QualType(), SizeExpr, AttrLoc, VecKind);

	DependentVectorType *CanonCheck =
	DependentVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CanonCheck &&
	"Dependent-sized vector_size canonical type broken");
	(void)CanonCheck;
	DependentVectorTypes.InsertNode(New, InsertPos);
	} else {
	QualType CanonTy = getDependentVectorType(CanonVecTy, SizeExpr,
	SourceLocation(), VecKind);
	New = new (*this, TypeAlignment) DependentVectorType(
	*this, VecType, CanonTy, SizeExpr, AttrLoc, VecKind);
	}
	}

	Types.push_back(New);
	return QualType(New, 0);
	}

	/// getExtVectorType - Return the unique reference to an extended vector type of
	/// the specified element type and size. VectorType must be a built-in type.
	QualType ASTContext::getExtVectorType(QualType vecType,
	unsigned NumElts) const {
	assert(vecType->isBuiltinType() \|\| vecType->isDependentType() \|\|
	(vecType->isBitIntType() &&
	// Only support _BitInt elements with byte-sized power of 2 NumBits.
	llvm::isPowerOf2_32(vecType->getAs<BitIntType>()->getNumBits()) &&
	vecType->getAs<BitIntType>()->getNumBits() >= 8));

	// Check if we've already instantiated a vector of this type.
	llvm::FoldingSetNodeID ID;
	VectorType::Profile(ID, vecType, NumElts, Type::ExtVector,
	VectorType::GenericVector);
	void *InsertPos = nullptr;
	if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(VTP, 0);

	// If the element type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!vecType.isCanonical()) {
	Canonical = getExtVectorType(getCanonicalType(vecType), NumElts);

	// Get the new insert position for the node we care about.
	VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment)
	ExtVectorType(vecType, NumElts, Canonical);
	VectorTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType
	ASTContext::getDependentSizedExtVectorType(QualType vecType,
	Expr *SizeExpr,
	SourceLocation AttrLoc) const {
	llvm::FoldingSetNodeID ID;
	DependentSizedExtVectorType::Profile(ID, *this, getCanonicalType(vecType),
	SizeExpr);

	void *InsertPos = nullptr;
	DependentSizedExtVectorType *Canon
	= DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	DependentSizedExtVectorType *New;
	if (Canon) {
	// We already have a canonical version of this array type; use it as
	// the canonical type for a newly-built type.
	New = new (*this, TypeAlignment)
	DependentSizedExtVectorType(*this, vecType, QualType(Canon, 0),
	SizeExpr, AttrLoc);
	} else {
	QualType CanonVecTy = getCanonicalType(vecType);
	if (CanonVecTy == vecType) {
	New = new (*this, TypeAlignment)
	DependentSizedExtVectorType(*this, vecType, QualType(), SizeExpr,
	AttrLoc);

	DependentSizedExtVectorType *CanonCheck
	= DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CanonCheck && "Dependent-sized ext_vector canonical type broken");
	(void)CanonCheck;
	DependentSizedExtVectorTypes.InsertNode(New, InsertPos);
	} else {
	QualType CanonExtTy = getDependentSizedExtVectorType(CanonVecTy, SizeExpr,
	SourceLocation());
	New = new (*this, TypeAlignment) DependentSizedExtVectorType(
	*this, vecType, CanonExtTy, SizeExpr, AttrLoc);
	}
	}

	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows,
	unsigned NumColumns) const {
	llvm::FoldingSetNodeID ID;
	ConstantMatrixType::Profile(ID, ElementTy, NumRows, NumColumns,
	Type::ConstantMatrix);

	assert(MatrixType::isValidElementType(ElementTy) &&
	"need a valid element type");
	assert(ConstantMatrixType::isDimensionValid(NumRows) &&
	ConstantMatrixType::isDimensionValid(NumColumns) &&
	"need valid matrix dimensions");
	void *InsertPos = nullptr;
	if (ConstantMatrixType *MTP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(MTP, 0);

	QualType Canonical;
	if (!ElementTy.isCanonical()) {
	Canonical =
	getConstantMatrixType(getCanonicalType(ElementTy), NumRows, NumColumns);

	ConstantMatrixType *NewIP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Matrix type shouldn't already exist in the map");
	(void)NewIP;
	}

	auto New = new (this, TypeAlignment)
	ConstantMatrixType(ElementTy, NumRows, NumColumns, Canonical);
	MatrixTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getDependentSizedMatrixType(QualType ElementTy,
	Expr *RowExpr,
	Expr *ColumnExpr,
	SourceLocation AttrLoc) const {
	QualType CanonElementTy = getCanonicalType(ElementTy);
	llvm::FoldingSetNodeID ID;
	DependentSizedMatrixType::Profile(ID, *this, CanonElementTy, RowExpr,
	ColumnExpr);

	void *InsertPos = nullptr;
	DependentSizedMatrixType *Canon =
	DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!Canon) {
	Canon = new (*this, TypeAlignment) DependentSizedMatrixType(
	*this, CanonElementTy, QualType(), RowExpr, ColumnExpr, AttrLoc);
	#ifndef NDEBUG
	DependentSizedMatrixType *CanonCheck =
	DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CanonCheck && "Dependent-sized matrix canonical type broken");
	#endif
	DependentSizedMatrixTypes.InsertNode(Canon, InsertPos);
	Types.push_back(Canon);
	}

	// Already have a canonical version of the matrix type
	//
	// If it exactly matches the requested type, use it directly.
	if (Canon->getElementType() == ElementTy && Canon->getRowExpr() == RowExpr &&
	Canon->getRowExpr() == ColumnExpr)
	return QualType(Canon, 0);

	// Use Canon as the canonical type for newly-built type.
	DependentSizedMatrixType New = new (this, TypeAlignment)
	DependentSizedMatrixType(*this, ElementTy, QualType(Canon, 0), RowExpr,
	ColumnExpr, AttrLoc);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getDependentAddressSpaceType(QualType PointeeType,
	Expr *AddrSpaceExpr,
	SourceLocation AttrLoc) const {
	assert(AddrSpaceExpr->isInstantiationDependent());

	QualType canonPointeeType = getCanonicalType(PointeeType);

	void *insertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	DependentAddressSpaceType::Profile(ID, *this, canonPointeeType,
	AddrSpaceExpr);

	DependentAddressSpaceType *canonTy =
	DependentAddressSpaceTypes.FindNodeOrInsertPos(ID, insertPos);

	if (!canonTy) {
	canonTy = new (*this, TypeAlignment)
	DependentAddressSpaceType(*this, canonPointeeType,
	QualType(), AddrSpaceExpr, AttrLoc);
	DependentAddressSpaceTypes.InsertNode(canonTy, insertPos);
	Types.push_back(canonTy);
	}

	if (canonPointeeType == PointeeType &&
	canonTy->getAddrSpaceExpr() == AddrSpaceExpr)
	return QualType(canonTy, 0);

	auto *sugaredType
	= new (*this, TypeAlignment)
	DependentAddressSpaceType(*this, PointeeType, QualType(canonTy, 0),
	AddrSpaceExpr, AttrLoc);
	Types.push_back(sugaredType);
	return QualType(sugaredType, 0);
	}

	/// Determine whether \p T is canonical as the result type of a function.
	static bool isCanonicalResultType(QualType T) {
	return T.isCanonical() &&
	(T.getObjCLifetime() == Qualifiers::OCL_None \|\|
	T.getObjCLifetime() == Qualifiers::OCL_ExplicitNone);
	}

	/// getFunctionNoProtoType - Return a K&R style C function type like 'int()'.
	QualType
	ASTContext::getFunctionNoProtoType(QualType ResultTy,
	const FunctionType::ExtInfo &Info) const {
	// FIXME: This assertion cannot be enabled (yet) because the ObjC rewriter
	// functionality creates a function without a prototype regardless of
	// language mode (so it makes them even in C++). Once the rewriter has been
	// fixed, this assertion can be enabled again.
	//assert(!LangOpts.requiresStrictPrototypes() &&
	// "strict prototypes are disabled");

	// Unique functions, to guarantee there is only one function of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	FunctionNoProtoType::Profile(ID, ResultTy, Info);

	void *InsertPos = nullptr;
	if (FunctionNoProtoType *FT =
	FunctionNoProtoTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(FT, 0);

	QualType Canonical;
	if (!isCanonicalResultType(ResultTy)) {
	Canonical =
	getFunctionNoProtoType(getCanonicalFunctionResultType(ResultTy), Info);

	// Get the new insert position for the node we care about.
	FunctionNoProtoType *NewIP =
	FunctionNoProtoTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	auto New = new (this, TypeAlignment)
	FunctionNoProtoType(ResultTy, Canonical, Info);
	Types.push_back(New);
	FunctionNoProtoTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	CanQualType
	ASTContext::getCanonicalFunctionResultType(QualType ResultType) const {
	CanQualType CanResultType = getCanonicalType(ResultType);

	// Canonical result types do not have ARC lifetime qualifiers.
	if (CanResultType.getQualifiers().hasObjCLifetime()) {
	Qualifiers Qs = CanResultType.getQualifiers();
	Qs.removeObjCLifetime();
	return CanQualType::CreateUnsafe(
	getQualifiedType(CanResultType.getUnqualifiedType(), Qs));
	}

	return CanResultType;
	}

	static bool isCanonicalExceptionSpecification(
	const FunctionProtoType::ExceptionSpecInfo &ESI, bool NoexceptInType) {
	if (ESI.Type == EST_None)
	return true;
	if (!NoexceptInType)
	return false;

	// C++17 onwards: exception specification is part of the type, as a simple
	// boolean "can this function type throw".
	if (ESI.Type == EST_BasicNoexcept)
	return true;

	// A noexcept(expr) specification is (possibly) canonical if expr is
	// value-dependent.
	if (ESI.Type == EST_DependentNoexcept)
	return true;

	// A dynamic exception specification is canonical if it only contains pack
	// expansions (so we can't tell whether it's non-throwing) and all its
	// contained types are canonical.
	if (ESI.Type == EST_Dynamic) {
	bool AnyPackExpansions = false;
	for (QualType ET : ESI.Exceptions) {
	if (!ET.isCanonical())
	return false;
	if (ET->getAs<PackExpansionType>())
	AnyPackExpansions = true;
	}
	return AnyPackExpansions;
	}

	return false;
	}

	QualType ASTContext::getFunctionTypeInternal(
	QualType ResultTy, ArrayRef<QualType> ArgArray,
	const FunctionProtoType::ExtProtoInfo &EPI, bool OnlyWantCanonical) const {
	size_t NumArgs = ArgArray.size();

	// Unique functions, to guarantee there is only one function of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	FunctionProtoType::Profile(ID, ResultTy, ArgArray.begin(), NumArgs, EPI,
	*this, true);

	QualType Canonical;
	bool Unique = false;

	void *InsertPos = nullptr;
	if (FunctionProtoType *FPT =
	FunctionProtoTypes.FindNodeOrInsertPos(ID, InsertPos)) {
	QualType Existing = QualType(FPT, 0);

	// If we find a pre-existing equivalent FunctionProtoType, we can just reuse
	// it so long as our exception specification doesn't contain a dependent
	// noexcept expression, or we're just looking for a canonical type.
	// Otherwise, we're going to need to create a type
	// sugar node to hold the concrete expression.
	if (OnlyWantCanonical \|\| !isComputedNoexcept(EPI.ExceptionSpec.Type) \|\|
	EPI.ExceptionSpec.NoexceptExpr == FPT->getNoexceptExpr())
	return Existing;

	// We need a new type sugar node for this one, to hold the new noexcept
	// expression. We do no canonicalization here, but that's OK since we don't
	// expect to see the same noexcept expression much more than once.
	Canonical = getCanonicalType(Existing);
	Unique = true;
	}

	bool NoexceptInType = getLangOpts().CPlusPlus17;
	bool IsCanonicalExceptionSpec =
	isCanonicalExceptionSpecification(EPI.ExceptionSpec, NoexceptInType);

	// Determine whether the type being created is already canonical or not.
	bool isCanonical = !Unique && IsCanonicalExceptionSpec &&
	isCanonicalResultType(ResultTy) && !EPI.HasTrailingReturn;
	for (unsigned i = 0; i != NumArgs && isCanonical; ++i)
	if (!ArgArray[i].isCanonicalAsParam())
	isCanonical = false;

	if (OnlyWantCanonical)
	assert(isCanonical &&
	"given non-canonical parameters constructing canonical type");

	// If this type isn't canonical, get the canonical version of it if we don't
	// already have it. The exception spec is only partially part of the
	// canonical type, and only in C++17 onwards.
	if (!isCanonical && Canonical.isNull()) {
	SmallVector<QualType, 16> CanonicalArgs;
	CanonicalArgs.reserve(NumArgs);
	for (unsigned i = 0; i != NumArgs; ++i)
	CanonicalArgs.push_back(getCanonicalParamType(ArgArray[i]));

	llvm::SmallVector<QualType, 8> ExceptionTypeStorage;
	FunctionProtoType::ExtProtoInfo CanonicalEPI = EPI;
	CanonicalEPI.HasTrailingReturn = false;

	if (IsCanonicalExceptionSpec) {
	// Exception spec is already OK.
	} else if (NoexceptInType) {
	switch (EPI.ExceptionSpec.Type) {
	case EST_Unparsed: case EST_Unevaluated: case EST_Uninstantiated:
	// We don't know yet. It shouldn't matter what we pick here; no-one
	// should ever look at this.
	[[fallthrough]];
	case EST_None: case EST_MSAny: case EST_NoexceptFalse:
	CanonicalEPI.ExceptionSpec.Type = EST_None;
	break;

	// A dynamic exception specification is almost always "not noexcept",
	// with the exception that a pack expansion might expand to no types.
	case EST_Dynamic: {
	bool AnyPacks = false;
	for (QualType ET : EPI.ExceptionSpec.Exceptions) {
	if (ET->getAs<PackExpansionType>())
	AnyPacks = true;
	ExceptionTypeStorage.push_back(getCanonicalType(ET));
	}
	if (!AnyPacks)
	CanonicalEPI.ExceptionSpec.Type = EST_None;
	else {
	CanonicalEPI.ExceptionSpec.Type = EST_Dynamic;
	CanonicalEPI.ExceptionSpec.Exceptions = ExceptionTypeStorage;
	}
	break;
	}

	case EST_DynamicNone:
	case EST_BasicNoexcept:
	case EST_NoexceptTrue:
	case EST_NoThrow:
	CanonicalEPI.ExceptionSpec.Type = EST_BasicNoexcept;
	break;

	case EST_DependentNoexcept:
	llvm_unreachable("dependent noexcept is already canonical");
	}
	} else {
	CanonicalEPI.ExceptionSpec = FunctionProtoType::ExceptionSpecInfo();
	}

	// Adjust the canonical function result type.
	CanQualType CanResultTy = getCanonicalFunctionResultType(ResultTy);
	Canonical =
	getFunctionTypeInternal(CanResultTy, CanonicalArgs, CanonicalEPI, true);

	// Get the new insert position for the node we care about.
	FunctionProtoType *NewIP =
	FunctionProtoTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	// Compute the needed size to hold this FunctionProtoType and the
	// various trailing objects.
	auto ESH = FunctionProtoType::getExceptionSpecSize(
	EPI.ExceptionSpec.Type, EPI.ExceptionSpec.Exceptions.size());
	size_t Size = FunctionProtoType::totalSizeToAlloc<
	QualType, SourceLocation, FunctionType::FunctionTypeExtraBitfields,
	FunctionType::ExceptionType, Expr , FunctionDecl ,
	FunctionProtoType::ExtParameterInfo, Qualifiers>(
	NumArgs, EPI.Variadic, EPI.requiresFunctionProtoTypeExtraBitfields(),
	ESH.NumExceptionType, ESH.NumExprPtr, ESH.NumFunctionDeclPtr,
	EPI.ExtParameterInfos ? NumArgs : 0,
	EPI.TypeQuals.hasNonFastQualifiers() ? 1 : 0);

	auto FTP = (FunctionProtoType )Allocate(Size, TypeAlignment);
	FunctionProtoType::ExtProtoInfo newEPI = EPI;
	new (FTP) FunctionProtoType(ResultTy, ArgArray, Canonical, newEPI);
	Types.push_back(FTP);
	if (!Unique)
	FunctionProtoTypes.InsertNode(FTP, InsertPos);
	return QualType(FTP, 0);
	}

	QualType ASTContext::getPipeType(QualType T, bool ReadOnly) const {
	llvm::FoldingSetNodeID ID;
	PipeType::Profile(ID, T, ReadOnly);

	void *InsertPos = nullptr;
	if (PipeType *PT = PipeTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the pipe element type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getPipeType(getCanonicalType(T), ReadOnly);

	// Get the new insert position for the node we care about.
	PipeType *NewIP = PipeTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!");
	(void)NewIP;
	}
	auto New = new (this, TypeAlignment) PipeType(T, Canonical, ReadOnly);
	Types.push_back(New);
	PipeTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	QualType ASTContext::adjustStringLiteralBaseType(QualType Ty) const {
	// OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
	return LangOpts.OpenCL ? getAddrSpaceQualType(Ty, LangAS::opencl_constant)
	: Ty;
	}

	QualType ASTContext::getReadPipeType(QualType T) const {
	return getPipeType(T, true);
	}

	QualType ASTContext::getWritePipeType(QualType T) const {
	return getPipeType(T, false);
	}

	QualType ASTContext::getBitIntType(bool IsUnsigned, unsigned NumBits) const {
	llvm::FoldingSetNodeID ID;
	BitIntType::Profile(ID, IsUnsigned, NumBits);

	void *InsertPos = nullptr;
	if (BitIntType *EIT = BitIntTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(EIT, 0);

	auto New = new (this, TypeAlignment) BitIntType(IsUnsigned, NumBits);
	BitIntTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getDependentBitIntType(bool IsUnsigned,
	Expr *NumBitsExpr) const {
	assert(NumBitsExpr->isInstantiationDependent() && "Only good for dependent");
	llvm::FoldingSetNodeID ID;
	DependentBitIntType::Profile(ID, *this, IsUnsigned, NumBitsExpr);

	void *InsertPos = nullptr;
	if (DependentBitIntType *Existing =
	DependentBitIntTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(Existing, 0);

	auto New = new (this, TypeAlignment)
	DependentBitIntType(*this, IsUnsigned, NumBitsExpr);
	DependentBitIntTypes.InsertNode(New, InsertPos);

	Types.push_back(New);
	return QualType(New, 0);
	}

	#ifndef NDEBUG
	static bool NeedsInjectedClassNameType(const RecordDecl *D) {
	if (!isa<CXXRecordDecl>(D)) return false;
	const auto *RD = cast<CXXRecordDecl>(D);
	if (isa<ClassTemplatePartialSpecializationDecl>(RD))
	return true;
	if (RD->getDescribedClassTemplate() &&
	!isa<ClassTemplateSpecializationDecl>(RD))
	return true;
	return false;
	}
	#endif

	/// getInjectedClassNameType - Return the unique reference to the
	/// injected class name type for the specified templated declaration.
	QualType ASTContext::getInjectedClassNameType(CXXRecordDecl *Decl,
	QualType TST) const {
	assert(NeedsInjectedClassNameType(Decl));
	if (Decl->TypeForDecl) {
	assert(isa<InjectedClassNameType>(Decl->TypeForDecl));
	} else if (CXXRecordDecl *PrevDecl = Decl->getPreviousDecl()) {
	assert(PrevDecl->TypeForDecl && "previous declaration has no type");
	Decl->TypeForDecl = PrevDecl->TypeForDecl;
	assert(isa<InjectedClassNameType>(Decl->TypeForDecl));
	} else {
	Type *newType =
	new (*this, TypeAlignment) InjectedClassNameType(Decl, TST);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	}
	return QualType(Decl->TypeForDecl, 0);
	}

	/// getTypeDeclType - Return the unique reference to the type for the
	/// specified type declaration.
	QualType ASTContext::getTypeDeclTypeSlow(const TypeDecl *Decl) const {
	assert(Decl && "Passed null for Decl param");
	assert(!Decl->TypeForDecl && "TypeForDecl present in slow case");

	if (const auto *Typedef = dyn_cast<TypedefNameDecl>(Decl))
	return getTypedefType(Typedef);

	assert(!isa<TemplateTypeParmDecl>(Decl) &&
	"Template type parameter types are always available.");

	if (const auto *Record = dyn_cast<RecordDecl>(Decl)) {
	assert(Record->isFirstDecl() && "struct/union has previous declaration");
	assert(!NeedsInjectedClassNameType(Record));
	return getRecordType(Record);
	} else if (const auto *Enum = dyn_cast<EnumDecl>(Decl)) {
	assert(Enum->isFirstDecl() && "enum has previous declaration");
	return getEnumType(Enum);
	} else if (const auto *Using = dyn_cast<UnresolvedUsingTypenameDecl>(Decl)) {
	return getUnresolvedUsingType(Using);
	} else
	llvm_unreachable("TypeDecl without a type?");

	return QualType(Decl->TypeForDecl, 0);
	}

	/// getTypedefType - Return the unique reference to the type for the
	/// specified typedef name decl.
	QualType ASTContext::getTypedefType(const TypedefNameDecl *Decl,
	QualType Underlying) const {
	if (!Decl->TypeForDecl) {
	if (Underlying.isNull())
	Underlying = Decl->getUnderlyingType();
	auto NewType = new (this, TypeAlignment) TypedefType(
	Type::Typedef, Decl, QualType(), getCanonicalType(Underlying));
	Decl->TypeForDecl = NewType;
	Types.push_back(NewType);
	return QualType(NewType, 0);
	}
	if (Underlying.isNull() \|\| Decl->getUnderlyingType() == Underlying)
	return QualType(Decl->TypeForDecl, 0);
	assert(hasSameType(Decl->getUnderlyingType(), Underlying));

	llvm::FoldingSetNodeID ID;
	TypedefType::Profile(ID, Decl, Underlying);

	void *InsertPos = nullptr;
	if (TypedefType *T = TypedefTypes.FindNodeOrInsertPos(ID, InsertPos)) {
	assert(!T->typeMatchesDecl() &&
	"non-divergent case should be handled with TypeDecl");
	return QualType(T, 0);
	}

	void *Mem =
	Allocate(TypedefType::totalSizeToAlloc<QualType>(true), TypeAlignment);
	auto *NewType = new (Mem) TypedefType(Type::Typedef, Decl, Underlying,
	getCanonicalType(Underlying));
	TypedefTypes.InsertNode(NewType, InsertPos);
	Types.push_back(NewType);
	return QualType(NewType, 0);
	}

	QualType ASTContext::getUsingType(const UsingShadowDecl *Found,
	QualType Underlying) const {
	llvm::FoldingSetNodeID ID;
	UsingType::Profile(ID, Found, Underlying);

	void *InsertPos = nullptr;
	if (UsingType *T = UsingTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(T, 0);

	const Type *TypeForDecl =
	cast<TypeDecl>(Found->getTargetDecl())->getTypeForDecl();

	assert(!Underlying.hasLocalQualifiers());
	QualType Canon = Underlying->getCanonicalTypeInternal();
	assert(TypeForDecl->getCanonicalTypeInternal() == Canon);

	if (Underlying.getTypePtr() == TypeForDecl)
	Underlying = QualType();
	void *Mem =
	Allocate(UsingType::totalSizeToAlloc<QualType>(!Underlying.isNull()),
	TypeAlignment);
	UsingType *NewType = new (Mem) UsingType(Found, Underlying, Canon);
	Types.push_back(NewType);
	UsingTypes.InsertNode(NewType, InsertPos);
	return QualType(NewType, 0);
	}

	QualType ASTContext::getRecordType(const RecordDecl *Decl) const {
	if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);

	if (const RecordDecl *PrevDecl = Decl->getPreviousDecl())
	if (PrevDecl->TypeForDecl)
	return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0);

	auto newType = new (this, TypeAlignment) RecordType(Decl);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getEnumType(const EnumDecl *Decl) const {
	if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);

	if (const EnumDecl *PrevDecl = Decl->getPreviousDecl())
	if (PrevDecl->TypeForDecl)
	return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0);

	auto newType = new (this, TypeAlignment) EnumType(Decl);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getUnresolvedUsingType(
	const UnresolvedUsingTypenameDecl *Decl) const {
	if (Decl->TypeForDecl)
	return QualType(Decl->TypeForDecl, 0);

	if (const UnresolvedUsingTypenameDecl *CanonicalDecl =
	Decl->getCanonicalDecl())
	if (CanonicalDecl->TypeForDecl)
	return QualType(Decl->TypeForDecl = CanonicalDecl->TypeForDecl, 0);

	Type newType = new (this, TypeAlignment) UnresolvedUsingType(Decl);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getAttributedType(attr::Kind attrKind,
	QualType modifiedType,
	QualType equivalentType) const {
	llvm::FoldingSetNodeID id;
	AttributedType::Profile(id, attrKind, modifiedType, equivalentType);

	void *insertPos = nullptr;
	AttributedType *type = AttributedTypes.FindNodeOrInsertPos(id, insertPos);
	if (type) return QualType(type, 0);

	QualType canon = getCanonicalType(equivalentType);
	type = new (*this, TypeAlignment)
	AttributedType(canon, attrKind, modifiedType, equivalentType);

	Types.push_back(type);
	AttributedTypes.InsertNode(type, insertPos);

	return QualType(type, 0);
	}

	QualType ASTContext::getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr,
	QualType Wrapped) {
	llvm::FoldingSetNodeID ID;
	BTFTagAttributedType::Profile(ID, Wrapped, BTFAttr);

	void *InsertPos = nullptr;
	BTFTagAttributedType *Ty =
	BTFTagAttributedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (Ty)
	return QualType(Ty, 0);

	QualType Canon = getCanonicalType(Wrapped);
	Ty = new (*this, TypeAlignment) BTFTagAttributedType(Canon, Wrapped, BTFAttr);

	Types.push_back(Ty);
	BTFTagAttributedTypes.InsertNode(Ty, InsertPos);

	return QualType(Ty, 0);
	}

	/// Retrieve a substitution-result type.
	QualType ASTContext::getSubstTemplateTypeParmType(
	QualType Replacement, Decl *AssociatedDecl, unsigned Index,
	std::optional<unsigned> PackIndex) const {
	llvm::FoldingSetNodeID ID;
	SubstTemplateTypeParmType::Profile(ID, Replacement, AssociatedDecl, Index,
	PackIndex);
	void *InsertPos = nullptr;
	SubstTemplateTypeParmType *SubstParm =
	SubstTemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!SubstParm) {
	void *Mem = Allocate(SubstTemplateTypeParmType::totalSizeToAlloc<QualType>(
	!Replacement.isCanonical()),
	TypeAlignment);
	SubstParm = new (Mem) SubstTemplateTypeParmType(Replacement, AssociatedDecl,
	Index, PackIndex);
	Types.push_back(SubstParm);
	SubstTemplateTypeParmTypes.InsertNode(SubstParm, InsertPos);
	}

	return QualType(SubstParm, 0);
	}

	/// Retrieve a
	QualType
	ASTContext::getSubstTemplateTypeParmPackType(Decl *AssociatedDecl,
	unsigned Index, bool Final,
	const TemplateArgument &ArgPack) {
	#ifndef NDEBUG
	for (const auto &P : ArgPack.pack_elements())
	assert(P.getKind() == TemplateArgument::Type && "Pack contains a non-type");
	#endif

	llvm::FoldingSetNodeID ID;
	SubstTemplateTypeParmPackType::Profile(ID, AssociatedDecl, Index, Final,
	ArgPack);
	void *InsertPos = nullptr;
	if (SubstTemplateTypeParmPackType *SubstParm =
	SubstTemplateTypeParmPackTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(SubstParm, 0);

	QualType Canon;
	{
	TemplateArgument CanonArgPack = getCanonicalTemplateArgument(ArgPack);
	if (!AssociatedDecl->isCanonicalDecl() \|\|
	!CanonArgPack.structurallyEquals(ArgPack)) {
	Canon = getSubstTemplateTypeParmPackType(
	AssociatedDecl->getCanonicalDecl(), Index, Final, CanonArgPack);
	[[maybe_unused]] const auto *Nothing =
	SubstTemplateTypeParmPackTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!Nothing);
	}
	}

	auto SubstParm = new (this, TypeAlignment) SubstTemplateTypeParmPackType(
	Canon, AssociatedDecl, Index, Final, ArgPack);
	Types.push_back(SubstParm);
	SubstTemplateTypeParmPackTypes.InsertNode(SubstParm, InsertPos);
	return QualType(SubstParm, 0);
	}

	/// Retrieve the template type parameter type for a template
	/// parameter or parameter pack with the given depth, index, and (optionally)
	/// name.
	QualType ASTContext::getTemplateTypeParmType(unsigned Depth, unsigned Index,
	bool ParameterPack,
	TemplateTypeParmDecl *TTPDecl) const {
	llvm::FoldingSetNodeID ID;
	TemplateTypeParmType::Profile(ID, Depth, Index, ParameterPack, TTPDecl);
	void *InsertPos = nullptr;
	TemplateTypeParmType *TypeParm
	= TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (TypeParm)
	return QualType(TypeParm, 0);

	if (TTPDecl) {
	QualType Canon = getTemplateTypeParmType(Depth, Index, ParameterPack);
	TypeParm = new (*this, TypeAlignment) TemplateTypeParmType(TTPDecl, Canon);

	TemplateTypeParmType *TypeCheck
	= TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!TypeCheck && "Template type parameter canonical type broken");
	(void)TypeCheck;
	} else
	TypeParm = new (*this, TypeAlignment)
	TemplateTypeParmType(Depth, Index, ParameterPack);

	Types.push_back(TypeParm);
	TemplateTypeParmTypes.InsertNode(TypeParm, InsertPos);

	return QualType(TypeParm, 0);
	}

	TypeSourceInfo *
	ASTContext::getTemplateSpecializationTypeInfo(TemplateName Name,
	SourceLocation NameLoc,
	const TemplateArgumentListInfo &Args,
	QualType Underlying) const {
	assert(!Name.getAsDependentTemplateName() &&
	"No dependent template names here!");
	QualType TST =
	getTemplateSpecializationType(Name, Args.arguments(), Underlying);

	TypeSourceInfo *DI = CreateTypeSourceInfo(TST);
	TemplateSpecializationTypeLoc TL =
	DI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>();
	TL.setTemplateKeywordLoc(SourceLocation());
	TL.setTemplateNameLoc(NameLoc);
	TL.setLAngleLoc(Args.getLAngleLoc());
	TL.setRAngleLoc(Args.getRAngleLoc());
	for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
	TL.setArgLocInfo(i, Args[i].getLocInfo());
	return DI;
	}

	QualType
	ASTContext::getTemplateSpecializationType(TemplateName Template,
	ArrayRef<TemplateArgumentLoc> Args,
	QualType Underlying) const {
	assert(!Template.getAsDependentTemplateName() &&
	"No dependent template names here!");

	SmallVector<TemplateArgument, 4> ArgVec;
	ArgVec.reserve(Args.size());
	for (const TemplateArgumentLoc &Arg : Args)
	ArgVec.push_back(Arg.getArgument());

	return getTemplateSpecializationType(Template, ArgVec, Underlying);
	}

	#ifndef NDEBUG
	static bool hasAnyPackExpansions(ArrayRef<TemplateArgument> Args) {
	for (const TemplateArgument &Arg : Args)
	if (Arg.isPackExpansion())
	return true;

	return true;
	}
	#endif

	QualType
	ASTContext::getTemplateSpecializationType(TemplateName Template,
	ArrayRef<TemplateArgument> Args,
	QualType Underlying) const {
	assert(!Template.getAsDependentTemplateName() &&
	"No dependent template names here!");
	// Look through qualified template names.
	if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
	Template = QTN->getUnderlyingTemplate();

	const auto *TD = Template.getAsTemplateDecl();
	bool IsTypeAlias = TD && TD->isTypeAlias();
	QualType CanonType;
	if (!Underlying.isNull())
	CanonType = getCanonicalType(Underlying);
	else {
	// We can get here with an alias template when the specialization contains
	// a pack expansion that does not match up with a parameter pack.
	assert((!IsTypeAlias \|\| hasAnyPackExpansions(Args)) &&
	"Caller must compute aliased type");
	IsTypeAlias = false;
	CanonType = getCanonicalTemplateSpecializationType(Template, Args);
	}

	// Allocate the (non-canonical) template specialization type, but don't
	// try to unique it: these types typically have location information that
	// we don't unique and don't want to lose.
	void *Mem = Allocate(sizeof(TemplateSpecializationType) +
	sizeof(TemplateArgument) * Args.size() +
	(IsTypeAlias? sizeof(QualType) : 0),
	TypeAlignment);
	auto *Spec
	= new (Mem) TemplateSpecializationType(Template, Args, CanonType,
	IsTypeAlias ? Underlying : QualType());

	Types.push_back(Spec);
	return QualType(Spec, 0);
	}

	QualType ASTContext::getCanonicalTemplateSpecializationType(
	TemplateName Template, ArrayRef<TemplateArgument> Args) const {
	assert(!Template.getAsDependentTemplateName() &&
	"No dependent template names here!");

	// Look through qualified template names.
	if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
	Template = TemplateName(QTN->getUnderlyingTemplate());

	// Build the canonical template specialization type.
	TemplateName CanonTemplate = getCanonicalTemplateName(Template);
	bool AnyNonCanonArgs = false;
	auto CanonArgs =
	::getCanonicalTemplateArguments(*this, Args, AnyNonCanonArgs);

	// Determine whether this canonical template specialization type already
	// exists.
	llvm::FoldingSetNodeID ID;
	TemplateSpecializationType::Profile(ID, CanonTemplate,
	CanonArgs, *this);

	void *InsertPos = nullptr;
	TemplateSpecializationType *Spec
	= TemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!Spec) {
	// Allocate a new canonical template specialization type.
	void *Mem = Allocate((sizeof(TemplateSpecializationType) +
	sizeof(TemplateArgument) * CanonArgs.size()),
	TypeAlignment);
	Spec = new (Mem) TemplateSpecializationType(CanonTemplate,
	CanonArgs,
	QualType(), QualType());
	Types.push_back(Spec);
	TemplateSpecializationTypes.InsertNode(Spec, InsertPos);
	}

	assert(Spec->isDependentType() &&
	"Non-dependent template-id type must have a canonical type");
	return QualType(Spec, 0);
	}

	QualType ASTContext::getElaboratedType(ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	QualType NamedType,
	TagDecl *OwnedTagDecl) const {
	llvm::FoldingSetNodeID ID;
	ElaboratedType::Profile(ID, Keyword, NNS, NamedType, OwnedTagDecl);

	void *InsertPos = nullptr;
	ElaboratedType *T = ElaboratedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	QualType Canon = NamedType;
	if (!Canon.isCanonical()) {
	Canon = getCanonicalType(NamedType);
	ElaboratedType *CheckT = ElaboratedTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckT && "Elaborated canonical type broken");
	(void)CheckT;
	}

	void Mem = Allocate(ElaboratedType::totalSizeToAlloc<TagDecl >(!!OwnedTagDecl),
	TypeAlignment);
	T = new (Mem) ElaboratedType(Keyword, NNS, NamedType, Canon, OwnedTagDecl);

	Types.push_back(T);
	ElaboratedTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	QualType
	ASTContext::getParenType(QualType InnerType) const {
	llvm::FoldingSetNodeID ID;
	ParenType::Profile(ID, InnerType);

	void *InsertPos = nullptr;
	ParenType *T = ParenTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	QualType Canon = InnerType;
	if (!Canon.isCanonical()) {
	Canon = getCanonicalType(InnerType);
	ParenType *CheckT = ParenTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckT && "Paren canonical type broken");
	(void)CheckT;
	}

	T = new (*this, TypeAlignment) ParenType(InnerType, Canon);
	Types.push_back(T);
	ParenTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	QualType
	ASTContext::getMacroQualifiedType(QualType UnderlyingTy,
	const IdentifierInfo *MacroII) const {
	QualType Canon = UnderlyingTy;
	if (!Canon.isCanonical())
	Canon = getCanonicalType(UnderlyingTy);

	auto newType = new (this, TypeAlignment)
	MacroQualifiedType(UnderlyingTy, Canon, MacroII);
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	const IdentifierInfo *Name,
	QualType Canon) const {
	if (Canon.isNull()) {
	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
	if (CanonNNS != NNS)
	Canon = getDependentNameType(Keyword, CanonNNS, Name);
	}

	llvm::FoldingSetNodeID ID;
	DependentNameType::Profile(ID, Keyword, NNS, Name);

	void *InsertPos = nullptr;
	DependentNameType *T
	= DependentNameTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	T = new (*this, TypeAlignment) DependentNameType(Keyword, NNS, Name, Canon);
	Types.push_back(T);
	DependentNameTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	QualType ASTContext::getDependentTemplateSpecializationType(
	ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS,
	const IdentifierInfo *Name, ArrayRef<TemplateArgumentLoc> Args) const {
	// TODO: avoid this copy
	SmallVector<TemplateArgument, 16> ArgCopy;
	for (unsigned I = 0, E = Args.size(); I != E; ++I)
	ArgCopy.push_back(Args[I].getArgument());
	return getDependentTemplateSpecializationType(Keyword, NNS, Name, ArgCopy);
	}

	QualType
	ASTContext::getDependentTemplateSpecializationType(
	ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	const IdentifierInfo *Name,
	ArrayRef<TemplateArgument> Args) const {
	assert((!NNS \|\| NNS->isDependent()) &&
	"nested-name-specifier must be dependent");

	llvm::FoldingSetNodeID ID;
	DependentTemplateSpecializationType::Profile(ID, *this, Keyword, NNS,
	Name, Args);

	void *InsertPos = nullptr;
	DependentTemplateSpecializationType *T
	= DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);

	ElaboratedTypeKeyword CanonKeyword = Keyword;
	if (Keyword == ETK_None) CanonKeyword = ETK_Typename;

	bool AnyNonCanonArgs = false;
	auto CanonArgs =
	::getCanonicalTemplateArguments(*this, Args, AnyNonCanonArgs);

	QualType Canon;
	if (AnyNonCanonArgs \|\| CanonNNS != NNS \|\| CanonKeyword != Keyword) {
	Canon = getDependentTemplateSpecializationType(CanonKeyword, CanonNNS,
	Name,
	CanonArgs);

	// Find the insert position again.
	[[maybe_unused]] auto *Nothing =
	DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!Nothing && "canonical type broken");
	}

	void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) +
	sizeof(TemplateArgument) * Args.size()),
	TypeAlignment);
	T = new (Mem) DependentTemplateSpecializationType(Keyword, NNS,
	Name, Args, Canon);
	Types.push_back(T);
	DependentTemplateSpecializationTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) {
	TemplateArgument Arg;
	if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
	QualType ArgType = getTypeDeclType(TTP);
	if (TTP->isParameterPack())
	ArgType = getPackExpansionType(ArgType, std::nullopt);

	Arg = TemplateArgument(ArgType);
	} else if (auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
	QualType T =
	NTTP->getType().getNonPackExpansionType().getNonLValueExprType(*this);
	// For class NTTPs, ensure we include the 'const' so the type matches that
	// of a real template argument.
	// FIXME: It would be more faithful to model this as something like an
	// lvalue-to-rvalue conversion applied to a const-qualified lvalue.
	if (T->isRecordType())
	T.addConst();
	Expr E = new (this) DeclRefExpr(
	this, NTTP, /RefersToEnclosingVariableOrCapture*/ false, T,
	Expr::getValueKindForType(NTTP->getType()), NTTP->getLocation());

	if (NTTP->isParameterPack())
	E = new (*this)
	PackExpansionExpr(DependentTy, E, NTTP->getLocation(), std::nullopt);
	Arg = TemplateArgument(E);
	} else {
	auto *TTP = cast<TemplateTemplateParmDecl>(Param);
	if (TTP->isParameterPack())
	Arg = TemplateArgument(TemplateName(TTP), std::optional<unsigned>());
	else
	Arg = TemplateArgument(TemplateName(TTP));
	}

	if (Param->isTemplateParameterPack())
	Arg = TemplateArgument::CreatePackCopy(*this, Arg);

	return Arg;
	}

	void
	ASTContext::getInjectedTemplateArgs(const TemplateParameterList *Params,
	SmallVectorImpl<TemplateArgument> &Args) {
	Args.reserve(Args.size() + Params->size());

	for (NamedDecl Param : Params)
	Args.push_back(getInjectedTemplateArg(Param));
	}

	QualType ASTContext::getPackExpansionType(QualType Pattern,
	std::optional<unsigned> NumExpansions,
	bool ExpectPackInType) {
	assert((!ExpectPackInType \|\| Pattern->containsUnexpandedParameterPack()) &&
	"Pack expansions must expand one or more parameter packs");

	llvm::FoldingSetNodeID ID;
	PackExpansionType::Profile(ID, Pattern, NumExpansions);

	void *InsertPos = nullptr;
	PackExpansionType *T = PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	QualType Canon;
	if (!Pattern.isCanonical()) {
	Canon = getPackExpansionType(getCanonicalType(Pattern), NumExpansions,
	/ExpectPackInType=/false);

	// Find the insert position again, in case we inserted an element into
	// PackExpansionTypes and invalidated our insert position.
	PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	T = new (*this, TypeAlignment)
	PackExpansionType(Pattern, Canon, NumExpansions);
	Types.push_back(T);
	PackExpansionTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	/// CmpProtocolNames - Comparison predicate for sorting protocols
	/// alphabetically.
	static int CmpProtocolNames(ObjCProtocolDecl const LHS,
	ObjCProtocolDecl const RHS) {
	return DeclarationName::compare((LHS)->getDeclName(), (RHS)->getDeclName());
	}

	static bool areSortedAndUniqued(ArrayRef<ObjCProtocolDecl *> Protocols) {
	if (Protocols.empty()) return true;

	if (Protocols[0]->getCanonicalDecl() != Protocols[0])
	return false;

	for (unsigned i = 1; i != Protocols.size(); ++i)
	if (CmpProtocolNames(&Protocols[i - 1], &Protocols[i]) >= 0 \|\|
	Protocols[i]->getCanonicalDecl() != Protocols[i])
	return false;
	return true;
	}

	static void
	SortAndUniqueProtocols(SmallVectorImpl<ObjCProtocolDecl *> &Protocols) {
	// Sort protocols, keyed by name.
	llvm::array_pod_sort(Protocols.begin(), Protocols.end(), CmpProtocolNames);

	// Canonicalize.
	for (ObjCProtocolDecl *&P : Protocols)
	P = P->getCanonicalDecl();

	// Remove duplicates.
	auto ProtocolsEnd = std::unique(Protocols.begin(), Protocols.end());
	Protocols.erase(ProtocolsEnd, Protocols.end());
	}

	QualType ASTContext::getObjCObjectType(QualType BaseType,
	ObjCProtocolDecl * const *Protocols,
	unsigned NumProtocols) const {
	return getObjCObjectType(BaseType, {},
	llvm::ArrayRef(Protocols, NumProtocols),
	/isKindOf=/false);
	}

	QualType ASTContext::getObjCObjectType(
	QualType baseType,
	ArrayRef<QualType> typeArgs,
	ArrayRef<ObjCProtocolDecl *> protocols,
	bool isKindOf) const {
	// If the base type is an interface and there aren't any protocols or
	// type arguments to add, then the interface type will do just fine.
	if (typeArgs.empty() && protocols.empty() && !isKindOf &&
	isa<ObjCInterfaceType>(baseType))
	return baseType;

	// Look in the folding set for an existing type.
	llvm::FoldingSetNodeID ID;
	ObjCObjectTypeImpl::Profile(ID, baseType, typeArgs, protocols, isKindOf);
	void *InsertPos = nullptr;
	if (ObjCObjectType *QT = ObjCObjectTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(QT, 0);

	// Determine the type arguments to be used for canonicalization,
	// which may be explicitly specified here or written on the base
	// type.
	ArrayRef<QualType> effectiveTypeArgs = typeArgs;
	if (effectiveTypeArgs.empty()) {
	if (const auto *baseObject = baseType->getAs<ObjCObjectType>())
	effectiveTypeArgs = baseObject->getTypeArgs();
	}

	// Build the canonical type, which has the canonical base type and a
	// sorted-and-uniqued list of protocols and the type arguments
	// canonicalized.
	QualType canonical;
	bool typeArgsAreCanonical = llvm::all_of(
	effectiveTypeArgs, [&](QualType type) { return type.isCanonical(); });
	bool protocolsSorted = areSortedAndUniqued(protocols);
	if (!typeArgsAreCanonical \|\| !protocolsSorted \|\| !baseType.isCanonical()) {
	// Determine the canonical type arguments.
	ArrayRef<QualType> canonTypeArgs;
	SmallVector<QualType, 4> canonTypeArgsVec;
	if (!typeArgsAreCanonical) {
	canonTypeArgsVec.reserve(effectiveTypeArgs.size());
	for (auto typeArg : effectiveTypeArgs)
	canonTypeArgsVec.push_back(getCanonicalType(typeArg));
	canonTypeArgs = canonTypeArgsVec;
	} else {
	canonTypeArgs = effectiveTypeArgs;
	}

	ArrayRef<ObjCProtocolDecl *> canonProtocols;
	SmallVector<ObjCProtocolDecl*, 8> canonProtocolsVec;
	if (!protocolsSorted) {
	canonProtocolsVec.append(protocols.begin(), protocols.end());
	SortAndUniqueProtocols(canonProtocolsVec);
	canonProtocols = canonProtocolsVec;
	} else {
	canonProtocols = protocols;
	}

	canonical = getObjCObjectType(getCanonicalType(baseType), canonTypeArgs,
	canonProtocols, isKindOf);

	// Regenerate InsertPos.
	ObjCObjectTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	unsigned size = sizeof(ObjCObjectTypeImpl);
	size += typeArgs.size() * sizeof(QualType);
	size += protocols.size() * sizeof(ObjCProtocolDecl *);
	void *mem = Allocate(size, TypeAlignment);
	auto *T =
	new (mem) ObjCObjectTypeImpl(canonical, baseType, typeArgs, protocols,
	isKindOf);

	Types.push_back(T);
	ObjCObjectTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	/// Apply Objective-C protocol qualifiers to the given type.
	/// If this is for the canonical type of a type parameter, we can apply
	/// protocol qualifiers on the ObjCObjectPointerType.
	QualType
	ASTContext::applyObjCProtocolQualifiers(QualType type,
	ArrayRef<ObjCProtocolDecl *> protocols, bool &hasError,
	bool allowOnPointerType) const {
	hasError = false;

	if (const auto *objT = dyn_cast<ObjCTypeParamType>(type.getTypePtr())) {
	return getObjCTypeParamType(objT->getDecl(), protocols);
	}

	// Apply protocol qualifiers to ObjCObjectPointerType.
	if (allowOnPointerType) {
	if (const auto *objPtr =
	dyn_cast<ObjCObjectPointerType>(type.getTypePtr())) {
	const ObjCObjectType *objT = objPtr->getObjectType();
	// Merge protocol lists and construct ObjCObjectType.
	SmallVector<ObjCProtocolDecl*, 8> protocolsVec;
	protocolsVec.append(objT->qual_begin(),
	objT->qual_end());
	protocolsVec.append(protocols.begin(), protocols.end());
	ArrayRef<ObjCProtocolDecl *> protocols = protocolsVec;
	type = getObjCObjectType(
	objT->getBaseType(),
	objT->getTypeArgsAsWritten(),
	protocols,
	objT->isKindOfTypeAsWritten());
	return getObjCObjectPointerType(type);
	}
	}

	// Apply protocol qualifiers to ObjCObjectType.
	if (const auto *objT = dyn_cast<ObjCObjectType>(type.getTypePtr())){
	// FIXME: Check for protocols to which the class type is already
	// known to conform.

	return getObjCObjectType(objT->getBaseType(),
	objT->getTypeArgsAsWritten(),
	protocols,
	objT->isKindOfTypeAsWritten());
	}

	// If the canonical type is ObjCObjectType, ...
	if (type->isObjCObjectType()) {
	// Silently overwrite any existing protocol qualifiers.
	// TODO: determine whether that's the right thing to do.

	// FIXME: Check for protocols to which the class type is already
	// known to conform.
	return getObjCObjectType(type, {}, protocols, false);
	}

	// id<protocol-list>
	if (type->isObjCIdType()) {
	const auto *objPtr = type->castAs<ObjCObjectPointerType>();
	type = getObjCObjectType(ObjCBuiltinIdTy, {}, protocols,
	objPtr->isKindOfType());
	return getObjCObjectPointerType(type);
	}

	// Class<protocol-list>
	if (type->isObjCClassType()) {
	const auto *objPtr = type->castAs<ObjCObjectPointerType>();
	type = getObjCObjectType(ObjCBuiltinClassTy, {}, protocols,
	objPtr->isKindOfType());
	return getObjCObjectPointerType(type);
	}

	hasError = true;
	return type;
	}

	QualType
	ASTContext::getObjCTypeParamType(const ObjCTypeParamDecl *Decl,
	ArrayRef<ObjCProtocolDecl *> protocols) const {
	// Look in the folding set for an existing type.
	llvm::FoldingSetNodeID ID;
	ObjCTypeParamType::Profile(ID, Decl, Decl->getUnderlyingType(), protocols);
	void *InsertPos = nullptr;
	if (ObjCTypeParamType *TypeParam =
	ObjCTypeParamTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(TypeParam, 0);

	// We canonicalize to the underlying type.
	QualType Canonical = getCanonicalType(Decl->getUnderlyingType());
	if (!protocols.empty()) {
	// Apply the protocol qualifers.
	bool hasError;
	Canonical = getCanonicalType(applyObjCProtocolQualifiers(
	Canonical, protocols, hasError, true /allowOnPointerType/));
	assert(!hasError && "Error when apply protocol qualifier to bound type");
	}

	unsigned size = sizeof(ObjCTypeParamType);
	size += protocols.size() * sizeof(ObjCProtocolDecl *);
	void *mem = Allocate(size, TypeAlignment);
	auto *newType = new (mem) ObjCTypeParamType(Decl, Canonical, protocols);

	Types.push_back(newType);
	ObjCTypeParamTypes.InsertNode(newType, InsertPos);
	return QualType(newType, 0);
	}

	void ASTContext::adjustObjCTypeParamBoundType(const ObjCTypeParamDecl *Orig,
	ObjCTypeParamDecl *New) const {
	New->setTypeSourceInfo(getTrivialTypeSourceInfo(Orig->getUnderlyingType()));
	// Update TypeForDecl after updating TypeSourceInfo.
	auto NewTypeParamTy = cast<ObjCTypeParamType>(New->getTypeForDecl());
	SmallVector<ObjCProtocolDecl *, 8> protocols;
	protocols.append(NewTypeParamTy->qual_begin(), NewTypeParamTy->qual_end());
	QualType UpdatedTy = getObjCTypeParamType(New, protocols);
	New->setTypeForDecl(UpdatedTy.getTypePtr());
	}

	/// ObjCObjectAdoptsQTypeProtocols - Checks that protocols in IC's
	/// protocol list adopt all protocols in QT's qualified-id protocol
	/// list.
	bool ASTContext::ObjCObjectAdoptsQTypeProtocols(QualType QT,
	ObjCInterfaceDecl *IC) {
	if (!QT->isObjCQualifiedIdType())
	return false;

	if (const auto *OPT = QT->getAs<ObjCObjectPointerType>()) {
	// If both the right and left sides have qualifiers.
	for (auto *Proto : OPT->quals()) {
	if (!IC->ClassImplementsProtocol(Proto, false))
	return false;
	}
	return true;
	}
	return false;
	}

	/// QIdProtocolsAdoptObjCObjectProtocols - Checks that protocols in
	/// QT's qualified-id protocol list adopt all protocols in IDecl's list
	/// of protocols.
	bool ASTContext::QIdProtocolsAdoptObjCObjectProtocols(QualType QT,
	ObjCInterfaceDecl *IDecl) {
	if (!QT->isObjCQualifiedIdType())
	return false;
	const auto *OPT = QT->getAs<ObjCObjectPointerType>();
	if (!OPT)
	return false;
	if (!IDecl->hasDefinition())
	return false;
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> InheritedProtocols;
	CollectInheritedProtocols(IDecl, InheritedProtocols);
	if (InheritedProtocols.empty())
	return false;
	// Check that if every protocol in list of id<plist> conforms to a protocol
	// of IDecl's, then bridge casting is ok.
	bool Conforms = false;
	for (auto *Proto : OPT->quals()) {
	Conforms = false;
	for (auto *PI : InheritedProtocols) {
	if (ProtocolCompatibleWithProtocol(Proto, PI)) {
	Conforms = true;
	break;
	}
	}
	if (!Conforms)
	break;
	}
	if (Conforms)
	return true;

	for (auto *PI : InheritedProtocols) {
	// If both the right and left sides have qualifiers.
	bool Adopts = false;
	for (auto *Proto : OPT->quals()) {
	// return 'true' if 'PI' is in the inheritance hierarchy of Proto
	if ((Adopts = ProtocolCompatibleWithProtocol(PI, Proto)))
	break;
	}
	if (!Adopts)
	return false;
	}
	return true;
	}

	/// getObjCObjectPointerType - Return a ObjCObjectPointerType type for
	/// the given object type.
	QualType ASTContext::getObjCObjectPointerType(QualType ObjectT) const {
	llvm::FoldingSetNodeID ID;
	ObjCObjectPointerType::Profile(ID, ObjectT);

	void *InsertPos = nullptr;
	if (ObjCObjectPointerType *QT =
	ObjCObjectPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(QT, 0);

	// Find the canonical object type.
	QualType Canonical;
	if (!ObjectT.isCanonical()) {
	Canonical = getObjCObjectPointerType(getCanonicalType(ObjectT));

	// Regenerate InsertPos.
	ObjCObjectPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	// No match.
	void *Mem = Allocate(sizeof(ObjCObjectPointerType), TypeAlignment);
	auto *QType =
	new (Mem) ObjCObjectPointerType(Canonical, ObjectT);

	Types.push_back(QType);
	ObjCObjectPointerTypes.InsertNode(QType, InsertPos);
	return QualType(QType, 0);
	}

	/// getObjCInterfaceType - Return the unique reference to the type for the
	/// specified ObjC interface decl. The list of protocols is optional.
	QualType ASTContext::getObjCInterfaceType(const ObjCInterfaceDecl *Decl,
	ObjCInterfaceDecl *PrevDecl) const {
	if (Decl->TypeForDecl)
	return QualType(Decl->TypeForDecl, 0);

	if (PrevDecl) {
	assert(PrevDecl->TypeForDecl && "previous decl has no TypeForDecl");
	Decl->TypeForDecl = PrevDecl->TypeForDecl;
	return QualType(PrevDecl->TypeForDecl, 0);
	}

	// Prefer the definition, if there is one.
	if (const ObjCInterfaceDecl *Def = Decl->getDefinition())
	Decl = Def;

	void *Mem = Allocate(sizeof(ObjCInterfaceType), TypeAlignment);
	auto *T = new (Mem) ObjCInterfaceType(Decl);
	Decl->TypeForDecl = T;
	Types.push_back(T);
	return QualType(T, 0);
	}

	/// getTypeOfExprType - Unlike many "get<Type>" functions, we can't unique
	/// TypeOfExprType AST's (since expression's are never shared). For example,
	/// multiple declarations that refer to "typeof(x)" all contain different
	/// DeclRefExpr's. This doesn't effect the type checker, since it operates
	/// on canonical type's (which are always unique).
	QualType ASTContext::getTypeOfExprType(Expr *tofExpr, TypeOfKind Kind) const {
	TypeOfExprType *toe;
	if (tofExpr->isTypeDependent()) {
	llvm::FoldingSetNodeID ID;
	DependentTypeOfExprType::Profile(ID, *this, tofExpr,
	Kind == TypeOfKind::Unqualified);

	void *InsertPos = nullptr;
	DependentTypeOfExprType *Canon =
	DependentTypeOfExprTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (Canon) {
	// We already have a "canonical" version of an identical, dependent
	// typeof(expr) type. Use that as our canonical type.
	toe = new (*this, TypeAlignment)
	TypeOfExprType(tofExpr, Kind, QualType((TypeOfExprType *)Canon, 0));
	} else {
	// Build a new, canonical typeof(expr) type.
	Canon = new (*this, TypeAlignment)
	DependentTypeOfExprType(*this, tofExpr, Kind);
	DependentTypeOfExprTypes.InsertNode(Canon, InsertPos);
	toe = Canon;
	}
	} else {
	QualType Canonical = getCanonicalType(tofExpr->getType());
	toe = new (*this, TypeAlignment) TypeOfExprType(tofExpr, Kind, Canonical);
	}
	Types.push_back(toe);
	return QualType(toe, 0);
	}

	/// getTypeOfType - Unlike many "get<Type>" functions, we don't unique
	/// TypeOfType nodes. The only motivation to unique these nodes would be
	/// memory savings. Since typeof(t) is fairly uncommon, space shouldn't be
	/// an issue. This doesn't affect the type checker, since it operates
	/// on canonical types (which are always unique).
	QualType ASTContext::getTypeOfType(QualType tofType, TypeOfKind Kind) const {
	QualType Canonical = getCanonicalType(tofType);
	auto *tot =
	new (*this, TypeAlignment) TypeOfType(tofType, Canonical, Kind);
	Types.push_back(tot);
	return QualType(tot, 0);
	}

	/// getReferenceQualifiedType - Given an expr, will return the type for
	/// that expression, as in [dcl.type.simple]p4 but without taking id-expressions
	/// and class member access into account.
	QualType ASTContext::getReferenceQualifiedType(const Expr *E) const {
	// C++11 [dcl.type.simple]p4:
	// [...]
	QualType T = E->getType();
	switch (E->getValueKind()) {
	// - otherwise, if e is an xvalue, decltype(e) is T&&, where T is the
	// type of e;
	case VK_XValue:
	return getRValueReferenceType(T);
	// - otherwise, if e is an lvalue, decltype(e) is T&, where T is the
	// type of e;
	case VK_LValue:
	return getLValueReferenceType(T);
	// - otherwise, decltype(e) is the type of e.
	case VK_PRValue:
	return T;
	}
	llvm_unreachable("Unknown value kind");
	}

	/// Unlike many "get<Type>" functions, we don't unique DecltypeType
	/// nodes. This would never be helpful, since each such type has its own
	/// expression, and would not give a significant memory saving, since there
	/// is an Expr tree under each such type.
	QualType ASTContext::getDecltypeType(Expr *e, QualType UnderlyingType) const {
	DecltypeType *dt;

	// C++11 [temp.type]p2:
	// If an expression e involves a template parameter, decltype(e) denotes a
	// unique dependent type. Two such decltype-specifiers refer to the same
	// type only if their expressions are equivalent (14.5.6.1).
	if (e->isInstantiationDependent()) {
	llvm::FoldingSetNodeID ID;
	DependentDecltypeType::Profile(ID, *this, e);

	void *InsertPos = nullptr;
	DependentDecltypeType *Canon
	= DependentDecltypeTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (!Canon) {
	// Build a new, canonical decltype(expr) type.
	Canon = new (this, TypeAlignment) DependentDecltypeType(this, e);
	DependentDecltypeTypes.InsertNode(Canon, InsertPos);
	}
	dt = new (*this, TypeAlignment)
	DecltypeType(e, UnderlyingType, QualType((DecltypeType *)Canon, 0));
	} else {
	dt = new (*this, TypeAlignment)
	DecltypeType(e, UnderlyingType, getCanonicalType(UnderlyingType));
	}
	Types.push_back(dt);
	return QualType(dt, 0);
	}

	/// getUnaryTransformationType - We don't unique these, since the memory
	/// savings are minimal and these are rare.
	QualType ASTContext::getUnaryTransformType(QualType BaseType,
	QualType UnderlyingType,
	UnaryTransformType::UTTKind Kind)
	const {
	UnaryTransformType *ut = nullptr;

	if (BaseType->isDependentType()) {
	// Look in the folding set for an existing type.
	llvm::FoldingSetNodeID ID;
	DependentUnaryTransformType::Profile(ID, getCanonicalType(BaseType), Kind);

	void *InsertPos = nullptr;
	DependentUnaryTransformType *Canon
	= DependentUnaryTransformTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!Canon) {
	// Build a new, canonical __underlying_type(type) type.
	Canon = new (*this, TypeAlignment)
	DependentUnaryTransformType(*this, getCanonicalType(BaseType),
	Kind);
	DependentUnaryTransformTypes.InsertNode(Canon, InsertPos);
	}
	ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
	QualType(), Kind,
	QualType(Canon, 0));
	} else {
	QualType CanonType = getCanonicalType(UnderlyingType);
	ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
	UnderlyingType, Kind,
	CanonType);
	}
	Types.push_back(ut);
	return QualType(ut, 0);
	}

	QualType ASTContext::getAutoTypeInternal(
	QualType DeducedType, AutoTypeKeyword Keyword, bool IsDependent,
	bool IsPack, ConceptDecl *TypeConstraintConcept,
	ArrayRef<TemplateArgument> TypeConstraintArgs, bool IsCanon) const {
	if (DeducedType.isNull() && Keyword == AutoTypeKeyword::Auto &&
	!TypeConstraintConcept && !IsDependent)
	return getAutoDeductType();

	// Look in the folding set for an existing type.
	void *InsertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	AutoType::Profile(ID, *this, DeducedType, Keyword, IsDependent,
	TypeConstraintConcept, TypeConstraintArgs);
	if (AutoType *AT = AutoTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(AT, 0);

	QualType Canon;
	if (!IsCanon) {
	if (!DeducedType.isNull()) {
	Canon = DeducedType.getCanonicalType();
	} else if (TypeConstraintConcept) {
	Canon = getAutoTypeInternal(QualType(), Keyword, IsDependent, IsPack,
	nullptr, {}, true);
	// Find the insert position again.
	[[maybe_unused]] auto *Nothing =
	AutoTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!Nothing && "canonical type broken");
	}
	}

	void *Mem = Allocate(sizeof(AutoType) +
	sizeof(TemplateArgument) * TypeConstraintArgs.size(),
	TypeAlignment);
	auto *AT = new (Mem) AutoType(
	DeducedType, Keyword,
	(IsDependent ? TypeDependence::DependentInstantiation
	: TypeDependence::None) \|
	(IsPack ? TypeDependence::UnexpandedPack : TypeDependence::None),
	Canon, TypeConstraintConcept, TypeConstraintArgs);
	Types.push_back(AT);
	AutoTypes.InsertNode(AT, InsertPos);
	return QualType(AT, 0);
	}

	/// getAutoType - Return the uniqued reference to the 'auto' type which has been
	/// deduced to the given type, or to the canonical undeduced 'auto' type, or the
	/// canonical deduced-but-dependent 'auto' type.
	QualType
	ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
	bool IsDependent, bool IsPack,
	ConceptDecl *TypeConstraintConcept,
	ArrayRef<TemplateArgument> TypeConstraintArgs) const {
	assert((!IsPack \|\| IsDependent) && "only use IsPack for a dependent pack");
	assert((!IsDependent \|\| DeducedType.isNull()) &&
	"A dependent auto should be undeduced");
	return getAutoTypeInternal(DeducedType, Keyword, IsDependent, IsPack,
	TypeConstraintConcept, TypeConstraintArgs);
	}

	/// Return the uniqued reference to the deduced template specialization type
	/// which has been deduced to the given type, or to the canonical undeduced
	/// such type, or the canonical deduced-but-dependent such type.
	QualType ASTContext::getDeducedTemplateSpecializationType(
	TemplateName Template, QualType DeducedType, bool IsDependent) const {
	// Look in the folding set for an existing type.
	void *InsertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	DeducedTemplateSpecializationType::Profile(ID, Template, DeducedType,
	IsDependent);
	if (DeducedTemplateSpecializationType *DTST =
	DeducedTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(DTST, 0);

	auto DTST = new (this, TypeAlignment)
	DeducedTemplateSpecializationType(Template, DeducedType, IsDependent);
	llvm::FoldingSetNodeID TempID;
	DTST->Profile(TempID);
	assert(ID == TempID && "ID does not match");
	Types.push_back(DTST);
	DeducedTemplateSpecializationTypes.InsertNode(DTST, InsertPos);
	return QualType(DTST, 0);
	}

	/// getAtomicType - Return the uniqued reference to the atomic type for
	/// the given value type.
	QualType ASTContext::getAtomicType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	AtomicType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (AtomicType *AT = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(AT, 0);

	// If the atomic value type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getAtomicType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	AtomicType *NewIP = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) AtomicType(T, Canonical);
	Types.push_back(New);
	AtomicTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getAutoDeductType - Get type pattern for deducing against 'auto'.
	QualType ASTContext::getAutoDeductType() const {
	if (AutoDeductTy.isNull())
	AutoDeductTy = QualType(new (*this, TypeAlignment)
	AutoType(QualType(), AutoTypeKeyword::Auto,
	TypeDependence::None, QualType(),
	/concept/ nullptr, /args/ {}),
	0);
	return AutoDeductTy;
	}

	/// getAutoRRefDeductType - Get type pattern for deducing against 'auto &&'.
	QualType ASTContext::getAutoRRefDeductType() const {
	if (AutoRRefDeductTy.isNull())
	AutoRRefDeductTy = getRValueReferenceType(getAutoDeductType());
	assert(!AutoRRefDeductTy.isNull() && "can't build 'auto &&' pattern");
	return AutoRRefDeductTy;
	}

	/// getTagDeclType - Return the unique reference to the type for the
	/// specified TagDecl (struct/union/class/enum) decl.
	QualType ASTContext::getTagDeclType(const TagDecl *Decl) const {
	assert(Decl);
	// FIXME: What is the design on getTagDeclType when it requires casting
	// away const? mutable?
	return getTypeDeclType(const_cast<TagDecl*>(Decl));
	}

	/// getSizeType - Return the unique type for "size_t" (C99 7.17), the result
	/// of the sizeof operator (C99 6.5.3.4p4). The value is target dependent and
	/// needs to agree with the definition in <stddef.h>.
	CanQualType ASTContext::getSizeType() const {
	return getFromTargetType(Target->getSizeType());
	}

	/// Return the unique signed counterpart of the integer type
	/// corresponding to size_t.
	CanQualType ASTContext::getSignedSizeType() const {
	return getFromTargetType(Target->getSignedSizeType());
	}

	/// getIntMaxType - Return the unique type for "intmax_t" (C99 7.18.1.5).
	CanQualType ASTContext::getIntMaxType() const {
	return getFromTargetType(Target->getIntMaxType());
	}

	/// getUIntMaxType - Return the unique type for "uintmax_t" (C99 7.18.1.5).
	CanQualType ASTContext::getUIntMaxType() const {
	return getFromTargetType(Target->getUIntMaxType());
	}

	/// getSignedWCharType - Return the type of "signed wchar_t".
	/// Used when in C++, as a GCC extension.
	QualType ASTContext::getSignedWCharType() const {
	// FIXME: derive from "Target" ?
	return WCharTy;
	}

	/// getUnsignedWCharType - Return the type of "unsigned wchar_t".
	/// Used when in C++, as a GCC extension.
	QualType ASTContext::getUnsignedWCharType() const {
	// FIXME: derive from "Target" ?
	return UnsignedIntTy;
	}

	QualType ASTContext::getIntPtrType() const {
	return getFromTargetType(Target->getIntPtrType());
	}

	QualType ASTContext::getUIntPtrType() const {
	return getCorrespondingUnsignedType(getIntPtrType());
	}

	/// getPointerDiffType - Return the unique type for "ptrdiff_t" (C99 7.17)
	/// defined in <stddef.h>. Pointer - pointer requires this (C99 6.5.6p9).
	QualType ASTContext::getPointerDiffType() const {
	return getFromTargetType(Target->getPtrDiffType(LangAS::Default));
	}

	/// Return the unique unsigned counterpart of "ptrdiff_t"
	/// integer type. The standard (C11 7.21.6.1p7) refers to this type
	/// in the definition of %tu format specifier.
	QualType ASTContext::getUnsignedPointerDiffType() const {
	return getFromTargetType(Target->getUnsignedPtrDiffType(LangAS::Default));
	}

	/// Return the unique type for "pid_t" defined in
	/// <sys/types.h>. We need this to compute the correct type for vfork().
	QualType ASTContext::getProcessIDType() const {
	return getFromTargetType(Target->getProcessIDType());
	}

	//===----------------------------------------------------------------------===//
	// Type Operators
	//===----------------------------------------------------------------------===//

	CanQualType ASTContext::getCanonicalParamType(QualType T) const {
	// Push qualifiers into arrays, and then discard any remaining
	// qualifiers.
	T = getCanonicalType(T);
	T = getVariableArrayDecayedType(T);
	const Type *Ty = T.getTypePtr();
	QualType Result;
	if (isa<ArrayType>(Ty)) {
	Result = getArrayDecayedType(QualType(Ty,0));
	} else if (isa<FunctionType>(Ty)) {
	Result = getPointerType(QualType(Ty, 0));
	} else {
	Result = QualType(Ty, 0);
	}

	return CanQualType::CreateUnsafe(Result);
	}

	QualType ASTContext::getUnqualifiedArrayType(QualType type,
	Qualifiers &quals) {
	SplitQualType splitType = type.getSplitUnqualifiedType();

	// FIXME: getSplitUnqualifiedType() actually walks all the way to
	// the unqualified desugared type and then drops it on the floor.
	// We then have to strip that sugar back off with
	// getUnqualifiedDesugaredType(), which is silly.
	const auto *AT =
	dyn_cast<ArrayType>(splitType.Ty->getUnqualifiedDesugaredType());

	// If we don't have an array, just use the results in splitType.
	if (!AT) {
	quals = splitType.Quals;
	return QualType(splitType.Ty, 0);
	}

	// Otherwise, recurse on the array's element type.
	QualType elementType = AT->getElementType();
	QualType unqualElementType = getUnqualifiedArrayType(elementType, quals);

	// If that didn't change the element type, AT has no qualifiers, so we
	// can just use the results in splitType.
	if (elementType == unqualElementType) {
	assert(quals.empty()); // from the recursive call
	quals = splitType.Quals;
	return QualType(splitType.Ty, 0);
	}

	// Otherwise, add in the qualifiers from the outermost type, then
	// build the type back up.
	quals.addConsistentQualifiers(splitType.Quals);

	if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
	return getConstantArrayType(unqualElementType, CAT->getSize(),
	CAT->getSizeExpr(), CAT->getSizeModifier(), 0);
	}

	if (const auto *IAT = dyn_cast<IncompleteArrayType>(AT)) {
	return getIncompleteArrayType(unqualElementType, IAT->getSizeModifier(), 0);
	}

	if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) {
	return getVariableArrayType(unqualElementType,
	VAT->getSizeExpr(),
	VAT->getSizeModifier(),
	VAT->getIndexTypeCVRQualifiers(),
	VAT->getBracketsRange());
	}

	const auto *DSAT = cast<DependentSizedArrayType>(AT);
	return getDependentSizedArrayType(unqualElementType, DSAT->getSizeExpr(),
	DSAT->getSizeModifier(), 0,
	SourceRange());
	}

	/// Attempt to unwrap two types that may both be array types with the same bound
	/// (or both be array types of unknown bound) for the purpose of comparing the
	/// cv-decomposition of two types per C++ [conv.qual].
	///
	/// \param AllowPiMismatch Allow the Pi1 and Pi2 to differ as described in
	/// C++20 [conv.qual], if permitted by the current language mode.
	void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2,
	bool AllowPiMismatch) {
	while (true) {
	auto *AT1 = getAsArrayType(T1);
	if (!AT1)
	return;

	auto *AT2 = getAsArrayType(T2);
	if (!AT2)
	return;

	// If we don't have two array types with the same constant bound nor two
	// incomplete array types, we've unwrapped everything we can.
	// C++20 also permits one type to be a constant array type and the other
	// to be an incomplete array type.
	// FIXME: Consider also unwrapping array of unknown bound and VLA.
	if (auto *CAT1 = dyn_cast<ConstantArrayType>(AT1)) {
	auto *CAT2 = dyn_cast<ConstantArrayType>(AT2);
	if (!((CAT2 && CAT1->getSize() == CAT2->getSize()) \|\|
	(AllowPiMismatch && getLangOpts().CPlusPlus20 &&
	isa<IncompleteArrayType>(AT2))))
	return;
	} else if (isa<IncompleteArrayType>(AT1)) {
	if (!(isa<IncompleteArrayType>(AT2) \|\|
	(AllowPiMismatch && getLangOpts().CPlusPlus20 &&
	isa<ConstantArrayType>(AT2))))
	return;
	} else {
	return;
	}

	T1 = AT1->getElementType();
	T2 = AT2->getElementType();
	}
	}

	/// Attempt to unwrap two types that may be similar (C++ [conv.qual]).
	///
	/// If T1 and T2 are both pointer types of the same kind, or both array types
	/// with the same bound, unwraps layers from T1 and T2 until a pointer type is
	/// unwrapped. Top-level qualifiers on T1 and T2 are ignored.
	///
	/// This function will typically be called in a loop that successively
	/// "unwraps" pointer and pointer-to-member types to compare them at each
	/// level.
	///
	/// \param AllowPiMismatch Allow the Pi1 and Pi2 to differ as described in
	/// C++20 [conv.qual], if permitted by the current language mode.
	///
	/// \return \c true if a pointer type was unwrapped, \c false if we reached a
	/// pair of types that can't be unwrapped further.
	bool ASTContext::UnwrapSimilarTypes(QualType &T1, QualType &T2,
	bool AllowPiMismatch) {
	UnwrapSimilarArrayTypes(T1, T2, AllowPiMismatch);

	const auto *T1PtrType = T1->getAs<PointerType>();
	const auto *T2PtrType = T2->getAs<PointerType>();
	if (T1PtrType && T2PtrType) {
	T1 = T1PtrType->getPointeeType();
	T2 = T2PtrType->getPointeeType();
	return true;
	}

	const auto *T1MPType = T1->getAs<MemberPointerType>();
	const auto *T2MPType = T2->getAs<MemberPointerType>();
	if (T1MPType && T2MPType &&
	hasSameUnqualifiedType(QualType(T1MPType->getClass(), 0),
	QualType(T2MPType->getClass(), 0))) {
	T1 = T1MPType->getPointeeType();
	T2 = T2MPType->getPointeeType();
	return true;
	}

	if (getLangOpts().ObjC) {
	const auto *T1OPType = T1->getAs<ObjCObjectPointerType>();
	const auto *T2OPType = T2->getAs<ObjCObjectPointerType>();
	if (T1OPType && T2OPType) {
	T1 = T1OPType->getPointeeType();
	T2 = T2OPType->getPointeeType();
	return true;
	}
	}

	// FIXME: Block pointers, too?

	return false;
	}

	bool ASTContext::hasSimilarType(QualType T1, QualType T2) {
	while (true) {
	Qualifiers Quals;
	T1 = getUnqualifiedArrayType(T1, Quals);
	T2 = getUnqualifiedArrayType(T2, Quals);
	if (hasSameType(T1, T2))
	return true;
	if (!UnwrapSimilarTypes(T1, T2))
	return false;
	}
	}

	bool ASTContext::hasCvrSimilarType(QualType T1, QualType T2) {
	while (true) {
	Qualifiers Quals1, Quals2;
	T1 = getUnqualifiedArrayType(T1, Quals1);
	T2 = getUnqualifiedArrayType(T2, Quals2);

	Quals1.removeCVRQualifiers();
	Quals2.removeCVRQualifiers();
	if (Quals1 != Quals2)
	return false;

	if (hasSameType(T1, T2))
	return true;

	if (!UnwrapSimilarTypes(T1, T2, /AllowPiMismatch/ false))
	return false;
	}
	}

	DeclarationNameInfo
	ASTContext::getNameForTemplate(TemplateName Name,
	SourceLocation NameLoc) const {
	switch (Name.getKind()) {
	case TemplateName::QualifiedTemplate:
	case TemplateName::Template:
	// DNInfo work in progress: CHECKME: what about DNLoc?
	return DeclarationNameInfo(Name.getAsTemplateDecl()->getDeclName(),
	NameLoc);

	case TemplateName::OverloadedTemplate: {
	OverloadedTemplateStorage *Storage = Name.getAsOverloadedTemplate();
	// DNInfo work in progress: CHECKME: what about DNLoc?
	return DeclarationNameInfo((*Storage->begin())->getDeclName(), NameLoc);
	}

	case TemplateName::AssumedTemplate: {
	AssumedTemplateStorage *Storage = Name.getAsAssumedTemplateName();
	return DeclarationNameInfo(Storage->getDeclName(), NameLoc);
	}

	case TemplateName::DependentTemplate: {
	DependentTemplateName *DTN = Name.getAsDependentTemplateName();
	DeclarationName DName;
	if (DTN->isIdentifier()) {
	DName = DeclarationNames.getIdentifier(DTN->getIdentifier());
	return DeclarationNameInfo(DName, NameLoc);
	} else {
	DName = DeclarationNames.getCXXOperatorName(DTN->getOperator());
	// DNInfo work in progress: FIXME: source locations?
	DeclarationNameLoc DNLoc =
	DeclarationNameLoc::makeCXXOperatorNameLoc(SourceRange());
	return DeclarationNameInfo(DName, NameLoc, DNLoc);
	}
	}

	case TemplateName::SubstTemplateTemplateParm: {
	SubstTemplateTemplateParmStorage *subst
	= Name.getAsSubstTemplateTemplateParm();
	return DeclarationNameInfo(subst->getParameter()->getDeclName(),
	NameLoc);
	}

	case TemplateName::SubstTemplateTemplateParmPack: {
	SubstTemplateTemplateParmPackStorage *subst
	= Name.getAsSubstTemplateTemplateParmPack();
	return DeclarationNameInfo(subst->getParameterPack()->getDeclName(),
	NameLoc);
	}
	case TemplateName::UsingTemplate:
	return DeclarationNameInfo(Name.getAsUsingShadowDecl()->getDeclName(),
	NameLoc);
	}

	llvm_unreachable("bad template name kind!");
	}

	TemplateName
	ASTContext::getCanonicalTemplateName(const TemplateName &Name) const {
	switch (Name.getKind()) {
	case TemplateName::UsingTemplate:
	case TemplateName::QualifiedTemplate:
	case TemplateName::Template: {
	TemplateDecl *Template = Name.getAsTemplateDecl();
	if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(Template))
	Template = getCanonicalTemplateTemplateParmDecl(TTP);

	// The canonical template name is the canonical template declaration.
	return TemplateName(cast<TemplateDecl>(Template->getCanonicalDecl()));
	}

	case TemplateName::OverloadedTemplate:
	case TemplateName::AssumedTemplate:
	llvm_unreachable("cannot canonicalize unresolved template");

	case TemplateName::DependentTemplate: {
	DependentTemplateName *DTN = Name.getAsDependentTemplateName();
	assert(DTN && "Non-dependent template names must refer to template decls.");
	return DTN->CanonicalTemplateName;
	}

	case TemplateName::SubstTemplateTemplateParm: {
	SubstTemplateTemplateParmStorage *subst
	= Name.getAsSubstTemplateTemplateParm();
	return getCanonicalTemplateName(subst->getReplacement());
	}

	case TemplateName::SubstTemplateTemplateParmPack: {
	SubstTemplateTemplateParmPackStorage *subst =
	Name.getAsSubstTemplateTemplateParmPack();
	TemplateArgument canonArgPack =
	getCanonicalTemplateArgument(subst->getArgumentPack());
	return getSubstTemplateTemplateParmPack(
	canonArgPack, subst->getAssociatedDecl()->getCanonicalDecl(),
	subst->getFinal(), subst->getIndex());
	}
	}

	llvm_unreachable("bad template name!");
	}

	bool ASTContext::hasSameTemplateName(const TemplateName &X,
	const TemplateName &Y) const {
	return getCanonicalTemplateName(X).getAsVoidPointer() ==
	getCanonicalTemplateName(Y).getAsVoidPointer();
	}

	bool ASTContext::isSameConstraintExpr(const Expr XCE, const Expr YCE) const {
	if (!XCE != !YCE)
	return false;

	if (!XCE)
	return true;

	llvm::FoldingSetNodeID XCEID, YCEID;
	XCE->Profile(XCEID, this, /Canonical=*/true);
	YCE->Profile(YCEID, this, /Canonical=*/true);
	return XCEID == YCEID;
	}

	bool ASTContext::isSameTypeConstraint(const TypeConstraint *XTC,
	const TypeConstraint *YTC) const {
	if (!XTC != !YTC)
	return false;

	if (!XTC)
	return true;

	auto *NCX = XTC->getNamedConcept();
	auto *NCY = YTC->getNamedConcept();
	if (!NCX \|\| !NCY \|\| !isSameEntity(NCX, NCY))
	return false;
	if (XTC->hasExplicitTemplateArgs() != YTC->hasExplicitTemplateArgs())
	return false;
	if (XTC->hasExplicitTemplateArgs())
	if (XTC->getTemplateArgsAsWritten()->NumTemplateArgs !=
	YTC->getTemplateArgsAsWritten()->NumTemplateArgs)
	return false;

	// Compare slowly by profiling.
	//
	// We couldn't compare the profiling result for the template
	// args here. Consider the following example in different modules:
	//
	// template <__integer_like _Tp, C<_Tp> Sentinel>
	// constexpr _Tp operator()(_Tp &&__t, Sentinel &&last) const {
	// return __t;
	// }
	//
	// When we compare the profiling result for `C<_Tp>` in different
	// modules, it will compare the type of `_Tp` in different modules.
	// However, the type of `_Tp` in different modules refer to different
	// types here naturally. So we couldn't compare the profiling result
	// for the template args directly.
	return isSameConstraintExpr(XTC->getImmediatelyDeclaredConstraint(),
	YTC->getImmediatelyDeclaredConstraint());
	}

	bool ASTContext::isSameTemplateParameter(const NamedDecl *X,
	const NamedDecl *Y) const {
	if (X->getKind() != Y->getKind())
	return false;

	if (auto *TX = dyn_cast<TemplateTypeParmDecl>(X)) {
	auto *TY = cast<TemplateTypeParmDecl>(Y);
	if (TX->isParameterPack() != TY->isParameterPack())
	return false;
	if (TX->hasTypeConstraint() != TY->hasTypeConstraint())
	return false;
	return isSameTypeConstraint(TX->getTypeConstraint(),
	TY->getTypeConstraint());
	}

	if (auto *TX = dyn_cast<NonTypeTemplateParmDecl>(X)) {
	auto *TY = cast<NonTypeTemplateParmDecl>(Y);
	return TX->isParameterPack() == TY->isParameterPack() &&
	TX->getASTContext().hasSameType(TX->getType(), TY->getType()) &&
	isSameConstraintExpr(TX->getPlaceholderTypeConstraint(),
	TY->getPlaceholderTypeConstraint());
	}

	auto *TX = cast<TemplateTemplateParmDecl>(X);
	auto *TY = cast<TemplateTemplateParmDecl>(Y);
	return TX->isParameterPack() == TY->isParameterPack() &&
	isSameTemplateParameterList(TX->getTemplateParameters(),
	TY->getTemplateParameters());
	}

	bool ASTContext::isSameTemplateParameterList(
	const TemplateParameterList X, const TemplateParameterList Y) const {
	if (X->size() != Y->size())
	return false;

	for (unsigned I = 0, N = X->size(); I != N; ++I)
	if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I)))
	return false;

	return isSameConstraintExpr(X->getRequiresClause(), Y->getRequiresClause());
	}

	bool ASTContext::isSameDefaultTemplateArgument(const NamedDecl *X,
	const NamedDecl *Y) const {
	// If the type parameter isn't the same already, we don't need to check the
	// default argument further.
	if (!isSameTemplateParameter(X, Y))
	return false;

	if (auto *TTPX = dyn_cast<TemplateTypeParmDecl>(X)) {
	auto *TTPY = cast<TemplateTypeParmDecl>(Y);
	if (!TTPX->hasDefaultArgument() \|\| !TTPY->hasDefaultArgument())
	return false;

	return hasSameType(TTPX->getDefaultArgument(), TTPY->getDefaultArgument());
	}

	if (auto *NTTPX = dyn_cast<NonTypeTemplateParmDecl>(X)) {
	auto *NTTPY = cast<NonTypeTemplateParmDecl>(Y);
	if (!NTTPX->hasDefaultArgument() \|\| !NTTPY->hasDefaultArgument())
	return false;

	Expr *DefaultArgumentX = NTTPX->getDefaultArgument()->IgnoreImpCasts();
	Expr *DefaultArgumentY = NTTPY->getDefaultArgument()->IgnoreImpCasts();
	llvm::FoldingSetNodeID XID, YID;
	DefaultArgumentX->Profile(XID, this, /Canonical=*/true);
	DefaultArgumentY->Profile(YID, this, /Canonical=*/true);
	return XID == YID;
	}

	auto *TTPX = cast<TemplateTemplateParmDecl>(X);
	auto *TTPY = cast<TemplateTemplateParmDecl>(Y);

	if (!TTPX->hasDefaultArgument() \|\| !TTPY->hasDefaultArgument())
	return false;

	const TemplateArgument &TAX = TTPX->getDefaultArgument().getArgument();
	const TemplateArgument &TAY = TTPY->getDefaultArgument().getArgument();
	return hasSameTemplateName(TAX.getAsTemplate(), TAY.getAsTemplate());
	}

	static NamespaceDecl getNamespace(const NestedNameSpecifier X) {
	if (auto *NS = X->getAsNamespace())
	return NS;
	if (auto *NAS = X->getAsNamespaceAlias())
	return NAS->getNamespace();
	return nullptr;
	}

	static bool isSameQualifier(const NestedNameSpecifier *X,
	const NestedNameSpecifier *Y) {
	if (auto *NSX = getNamespace(X)) {
	auto *NSY = getNamespace(Y);
	if (!NSY \|\| NSX->getCanonicalDecl() != NSY->getCanonicalDecl())
	return false;
	} else if (X->getKind() != Y->getKind())
	return false;

	// FIXME: For namespaces and types, we're permitted to check that the entity
	// is named via the same tokens. We should probably do so.
	switch (X->getKind()) {
	case NestedNameSpecifier::Identifier:
	if (X->getAsIdentifier() != Y->getAsIdentifier())
	return false;
	break;
	case NestedNameSpecifier::Namespace:
	case NestedNameSpecifier::NamespaceAlias:
	// We've already checked that we named the same namespace.
	break;
	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate:
	if (X->getAsType()->getCanonicalTypeInternal() !=
	Y->getAsType()->getCanonicalTypeInternal())
	return false;
	break;
	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	return true;
	}

	// Recurse into earlier portion of NNS, if any.
	auto *PX = X->getPrefix();
	auto *PY = Y->getPrefix();
	if (PX && PY)
	return isSameQualifier(PX, PY);
	return !PX && !PY;
	}

	/// Determine whether the attributes we can overload on are identical for A and
	/// B. Will ignore any overloadable attrs represented in the type of A and B.
	static bool hasSameOverloadableAttrs(const FunctionDecl *A,
	const FunctionDecl *B) {
	// Note that pass_object_size attributes are represented in the function's
	// ExtParameterInfo, so we don't need to check them here.

	llvm::FoldingSetNodeID Cand1ID, Cand2ID;
	auto AEnableIfAttrs = A->specific_attrs<EnableIfAttr>();
	auto BEnableIfAttrs = B->specific_attrs<EnableIfAttr>();

	for (auto Pair : zip_longest(AEnableIfAttrs, BEnableIfAttrs)) {
	std::optional<EnableIfAttr *> Cand1A = std::get<0>(Pair);
	std::optional<EnableIfAttr *> Cand2A = std::get<1>(Pair);

	// Return false if the number of enable_if attributes is different.
	if (!Cand1A \|\| !Cand2A)
	return false;

	Cand1ID.clear();
	Cand2ID.clear();

	(*Cand1A)->getCond()->Profile(Cand1ID, A->getASTContext(), true);
	(*Cand2A)->getCond()->Profile(Cand2ID, B->getASTContext(), true);

	// Return false if any of the enable_if expressions of A and B are
	// different.
	if (Cand1ID != Cand2ID)
	return false;
	}
	return true;
	}

	bool ASTContext::FriendsDifferByConstraints(const FunctionDecl *X,
	const FunctionDecl *Y) const {
	// If these aren't friends, then they aren't friends that differ by
	// constraints.
	if (!X->getFriendObjectKind() \|\| !Y->getFriendObjectKind())
	return false;

	// If the two functions share lexical declaration context, they are not in
	// separate instantations, and thus in the same scope.
	if (X->getLexicalDeclContext() == Y->getLexicalDeclContext())
	return false;

	if (!X->getDescribedFunctionTemplate()) {
	assert(!Y->getDescribedFunctionTemplate() &&
	"How would these be the same if they aren't both templates?");

	// If these friends don't have constraints, they aren't constrained, and
	// thus don't fall under temp.friend p9. Else the simple presence of a
	// constraint makes them unique.
	return X->getTrailingRequiresClause();
	}

	return X->FriendConstraintRefersToEnclosingTemplate();
	}

	bool ASTContext::isSameEntity(const NamedDecl X, const NamedDecl Y) const {
	if (X == Y)
	return true;

	if (X->getDeclName() != Y->getDeclName())
	return false;

	// Must be in the same context.
	//
	// Note that we can't use DeclContext::Equals here, because the DeclContexts
	// could be two different declarations of the same function. (We will fix the
	// semantic DC to refer to the primary definition after merging.)
	if (!declaresSameEntity(cast<Decl>(X->getDeclContext()->getRedeclContext()),
	cast<Decl>(Y->getDeclContext()->getRedeclContext())))
	return false;

	// Two typedefs refer to the same entity if they have the same underlying
	// type.
	if (const auto *TypedefX = dyn_cast<TypedefNameDecl>(X))
	if (const auto *TypedefY = dyn_cast<TypedefNameDecl>(Y))
	return hasSameType(TypedefX->getUnderlyingType(),
	TypedefY->getUnderlyingType());

	// Must have the same kind.
	if (X->getKind() != Y->getKind())
	return false;

	// Objective-C classes and protocols with the same name always match.
	if (isa<ObjCInterfaceDecl>(X) \|\| isa<ObjCProtocolDecl>(X))
	return true;

	if (isa<ClassTemplateSpecializationDecl>(X)) {
	// No need to handle these here: we merge them when adding them to the
	// template.
	return false;
	}

	// Compatible tags match.
	if (const auto *TagX = dyn_cast<TagDecl>(X)) {
	const auto *TagY = cast<TagDecl>(Y);
	return (TagX->getTagKind() == TagY->getTagKind()) \|\|
	((TagX->getTagKind() == TTK_Struct \|\|
	TagX->getTagKind() == TTK_Class \|\|
	TagX->getTagKind() == TTK_Interface) &&
	(TagY->getTagKind() == TTK_Struct \|\|
	TagY->getTagKind() == TTK_Class \|\|
	TagY->getTagKind() == TTK_Interface));
	}

	// Functions with the same type and linkage match.
	// FIXME: This needs to cope with merging of prototyped/non-prototyped
	// functions, etc.
	if (const auto *FuncX = dyn_cast<FunctionDecl>(X)) {
	const auto *FuncY = cast<FunctionDecl>(Y);
	if (const auto *CtorX = dyn_cast<CXXConstructorDecl>(X)) {
	const auto *CtorY = cast<CXXConstructorDecl>(Y);
	if (CtorX->getInheritedConstructor() &&
	!isSameEntity(CtorX->getInheritedConstructor().getConstructor(),
	CtorY->getInheritedConstructor().getConstructor()))
	return false;
	}

	if (FuncX->isMultiVersion() != FuncY->isMultiVersion())
	return false;

	// Multiversioned functions with different feature strings are represented
	// as separate declarations.
	if (FuncX->isMultiVersion()) {
	const auto *TAX = FuncX->getAttr<TargetAttr>();
	const auto *TAY = FuncY->getAttr<TargetAttr>();
	assert(TAX && TAY && "Multiversion Function without target attribute");

	if (TAX->getFeaturesStr() != TAY->getFeaturesStr())
	return false;
	}

	if (!isSameConstraintExpr(FuncX->getTrailingRequiresClause(),
	FuncY->getTrailingRequiresClause()))
	return false;

	// Constrained friends are different in certain cases, see: [temp.friend]p9.
	if (FriendsDifferByConstraints(FuncX, FuncY))
	return false;

	auto GetTypeAsWritten = [](const FunctionDecl *FD) {
	// Map to the first declaration that we've already merged into this one.
	// The TSI of redeclarations might not match (due to calling conventions
	// being inherited onto the type but not the TSI), but the TSI type of
	// the first declaration of the function should match across modules.
	FD = FD->getCanonicalDecl();
	return FD->getTypeSourceInfo() ? FD->getTypeSourceInfo()->getType()
	: FD->getType();
	};
	QualType XT = GetTypeAsWritten(FuncX), YT = GetTypeAsWritten(FuncY);
	if (!hasSameType(XT, YT)) {
	// We can get functions with different types on the redecl chain in C++17
	// if they have differing exception specifications and at least one of
	// the excpetion specs is unresolved.
	auto *XFPT = XT->getAs<FunctionProtoType>();
	auto *YFPT = YT->getAs<FunctionProtoType>();
	if (getLangOpts().CPlusPlus17 && XFPT && YFPT &&
	(isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) \|\|
	isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) &&
	hasSameFunctionTypeIgnoringExceptionSpec(XT, YT))
	return true;
	return false;
	}

	return FuncX->getLinkageInternal() == FuncY->getLinkageInternal() &&
	hasSameOverloadableAttrs(FuncX, FuncY);
	}

	// Variables with the same type and linkage match.
	if (const auto *VarX = dyn_cast<VarDecl>(X)) {
	const auto *VarY = cast<VarDecl>(Y);
	if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) {
	if (hasSameType(VarX->getType(), VarY->getType()))
	return true;

	// We can get decls with different types on the redecl chain. Eg.
	// template <typename T> struct S { static T Var[]; }; // #1
	// template <typename T> T S<T>::Var[sizeof(T)]; // #2
	// Only? happens when completing an incomplete array type. In this case
	// when comparing #1 and #2 we should go through their element type.
	const ArrayType *VarXTy = getAsArrayType(VarX->getType());
	const ArrayType *VarYTy = getAsArrayType(VarY->getType());
	if (!VarXTy \|\| !VarYTy)
	return false;
	if (VarXTy->isIncompleteArrayType() \|\| VarYTy->isIncompleteArrayType())
	return hasSameType(VarXTy->getElementType(), VarYTy->getElementType());
	}
	return false;
	}

	// Namespaces with the same name and inlinedness match.
	if (const auto *NamespaceX = dyn_cast<NamespaceDecl>(X)) {
	const auto *NamespaceY = cast<NamespaceDecl>(Y);
	return NamespaceX->isInline() == NamespaceY->isInline();
	}

	// Identical template names and kinds match if their template parameter lists
	// and patterns match.
	if (const auto *TemplateX = dyn_cast<TemplateDecl>(X)) {
	const auto *TemplateY = cast<TemplateDecl>(Y);

	// ConceptDecl wouldn't be the same if their constraint expression differs.
	if (const auto *ConceptX = dyn_cast<ConceptDecl>(X)) {
	const auto *ConceptY = cast<ConceptDecl>(Y);
	const Expr *XCE = ConceptX->getConstraintExpr();
	const Expr *YCE = ConceptY->getConstraintExpr();
	assert(XCE && YCE && "ConceptDecl without constraint expression?");
	llvm::FoldingSetNodeID XID, YID;
	XCE->Profile(XID, this, /Canonical=*/true);
	YCE->Profile(YID, this, /Canonical=*/true);
	if (XID != YID)
	return false;
	}

	return isSameEntity(TemplateX->getTemplatedDecl(),
	TemplateY->getTemplatedDecl()) &&
	isSameTemplateParameterList(TemplateX->getTemplateParameters(),
	TemplateY->getTemplateParameters());
	}

	// Fields with the same name and the same type match.
	if (const auto *FDX = dyn_cast<FieldDecl>(X)) {
	const auto *FDY = cast<FieldDecl>(Y);
	// FIXME: Also check the bitwidth is odr-equivalent, if any.
	return hasSameType(FDX->getType(), FDY->getType());
	}

	// Indirect fields with the same target field match.
	if (const auto *IFDX = dyn_cast<IndirectFieldDecl>(X)) {
	const auto *IFDY = cast<IndirectFieldDecl>(Y);
	return IFDX->getAnonField()->getCanonicalDecl() ==
	IFDY->getAnonField()->getCanonicalDecl();
	}

	// Enumerators with the same name match.
	if (isa<EnumConstantDecl>(X))
	// FIXME: Also check the value is odr-equivalent.
	return true;

	// Using shadow declarations with the same target match.
	if (const auto *USX = dyn_cast<UsingShadowDecl>(X)) {
	const auto *USY = cast<UsingShadowDecl>(Y);
	return USX->getTargetDecl() == USY->getTargetDecl();
	}

	// Using declarations with the same qualifier match. (We already know that
	// the name matches.)
	if (const auto *UX = dyn_cast<UsingDecl>(X)) {
	const auto *UY = cast<UsingDecl>(Y);
	return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
	UX->hasTypename() == UY->hasTypename() &&
	UX->isAccessDeclaration() == UY->isAccessDeclaration();
	}
	if (const auto *UX = dyn_cast<UnresolvedUsingValueDecl>(X)) {
	const auto *UY = cast<UnresolvedUsingValueDecl>(Y);
	return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
	UX->isAccessDeclaration() == UY->isAccessDeclaration();
	}
	if (const auto *UX = dyn_cast<UnresolvedUsingTypenameDecl>(X)) {
	return isSameQualifier(
	UX->getQualifier(),
	cast<UnresolvedUsingTypenameDecl>(Y)->getQualifier());
	}

	// Using-pack declarations are only created by instantiation, and match if
	// they're instantiated from matching UnresolvedUsing...Decls.
	if (const auto *UX = dyn_cast<UsingPackDecl>(X)) {
	return declaresSameEntity(
	UX->getInstantiatedFromUsingDecl(),
	cast<UsingPackDecl>(Y)->getInstantiatedFromUsingDecl());
	}

	// Namespace alias definitions with the same target match.
	if (const auto *NAX = dyn_cast<NamespaceAliasDecl>(X)) {
	const auto *NAY = cast<NamespaceAliasDecl>(Y);
	return NAX->getNamespace()->Equals(NAY->getNamespace());
	}

	return false;
	}

	TemplateArgument
	ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
	switch (Arg.getKind()) {
	case TemplateArgument::Null:
	return Arg;

	case TemplateArgument::Expression:
	return Arg;

	case TemplateArgument::Declaration: {
	auto *D = cast<ValueDecl>(Arg.getAsDecl()->getCanonicalDecl());
	return TemplateArgument(D, getCanonicalType(Arg.getParamTypeForDecl()));
	}

	case TemplateArgument::NullPtr:
	return TemplateArgument(getCanonicalType(Arg.getNullPtrType()),
	/isNullPtr/true);

	case TemplateArgument::Template:
	return TemplateArgument(getCanonicalTemplateName(Arg.getAsTemplate()));

	case TemplateArgument::TemplateExpansion:
	return TemplateArgument(getCanonicalTemplateName(
	Arg.getAsTemplateOrTemplatePattern()),
	Arg.getNumTemplateExpansions());

	case TemplateArgument::Integral:
	return TemplateArgument(Arg, getCanonicalType(Arg.getIntegralType()));

	case TemplateArgument::Type:
	return TemplateArgument(getCanonicalType(Arg.getAsType()));

	case TemplateArgument::Pack: {
	bool AnyNonCanonArgs = false;
	auto CanonArgs = ::getCanonicalTemplateArguments(
	*this, Arg.pack_elements(), AnyNonCanonArgs);
	if (!AnyNonCanonArgs)
	return Arg;
	return TemplateArgument::CreatePackCopy(const_cast<ASTContext &>(*this),
	CanonArgs);
	}
	}

	// Silence GCC warning
	llvm_unreachable("Unhandled template argument kind");
	}

	NestedNameSpecifier *
	ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
	if (!NNS)
	return nullptr;

	switch (NNS->getKind()) {
	case NestedNameSpecifier::Identifier:
	// Canonicalize the prefix but keep the identifier the same.
	return NestedNameSpecifier::Create(*this,
	getCanonicalNestedNameSpecifier(NNS->getPrefix()),
	NNS->getAsIdentifier());

	case NestedNameSpecifier::Namespace:
	// A namespace is canonical; build a nested-name-specifier with
	// this namespace and no prefix.
	return NestedNameSpecifier::Create(*this, nullptr,
	NNS->getAsNamespace()->getOriginalNamespace());

	case NestedNameSpecifier::NamespaceAlias:
	// A namespace is canonical; build a nested-name-specifier with
	// this namespace and no prefix.
	return NestedNameSpecifier::Create(*this, nullptr,
	NNS->getAsNamespaceAlias()->getNamespace()
	->getOriginalNamespace());

	// The difference between TypeSpec and TypeSpecWithTemplate is that the
	// latter will have the 'template' keyword when printed.
	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate: {
	const Type *T = getCanonicalType(NNS->getAsType());

	// If we have some kind of dependent-named type (e.g., "typename T::type"),
	// break it apart into its prefix and identifier, then reconsititute those
	// as the canonical nested-name-specifier. This is required to canonicalize
	// a dependent nested-name-specifier involving typedefs of dependent-name
	// types, e.g.,
	// typedef typename T::type T1;
	// typedef typename T1::type T2;
	if (const auto *DNT = T->getAs<DependentNameType>())
	return NestedNameSpecifier::Create(
	*this, DNT->getQualifier(),
	const_cast<IdentifierInfo *>(DNT->getIdentifier()));
	if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>())
	return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true,
	const_cast<Type *>(T));

	// TODO: Set 'Template' parameter to true for other template types.
	return NestedNameSpecifier::Create(*this, nullptr, false,
	const_cast<Type *>(T));
	}

	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	// The global specifier and __super specifer are canonical and unique.
	return NNS;
	}

	llvm_unreachable("Invalid NestedNameSpecifier::Kind!");
	}

	const ArrayType *ASTContext::getAsArrayType(QualType T) const {
	// Handle the non-qualified case efficiently.
	if (!T.hasLocalQualifiers()) {
	// Handle the common positive case fast.
	if (const auto *AT = dyn_cast<ArrayType>(T))
	return AT;
	}

	// Handle the common negative case fast.
	if (!isa<ArrayType>(T.getCanonicalType()))
	return nullptr;

	// Apply any qualifiers from the array type to the element type. This
	// implements C99 6.7.3p8: "If the specification of an array type includes
	// any type qualifiers, the element type is so qualified, not the array type."

	// If we get here, we either have type qualifiers on the type, or we have
	// sugar such as a typedef in the way. If we have type qualifiers on the type
	// we must propagate them down into the element type.

	SplitQualType split = T.getSplitDesugaredType();
	Qualifiers qs = split.Quals;

	// If we have a simple case, just return now.
	const auto *ATy = dyn_cast<ArrayType>(split.Ty);
	if (!ATy \|\| qs.empty())
	return ATy;

	// Otherwise, we have an array and we have qualifiers on it. Push the
	// qualifiers into the array element type and return a new array type.
	QualType NewEltTy = getQualifiedType(ATy->getElementType(), qs);

	if (const auto *CAT = dyn_cast<ConstantArrayType>(ATy))
	return cast<ArrayType>(getConstantArrayType(NewEltTy, CAT->getSize(),
	CAT->getSizeExpr(),
	CAT->getSizeModifier(),
	CAT->getIndexTypeCVRQualifiers()));
	if (const auto *IAT = dyn_cast<IncompleteArrayType>(ATy))
	return cast<ArrayType>(getIncompleteArrayType(NewEltTy,
	IAT->getSizeModifier(),
	IAT->getIndexTypeCVRQualifiers()));

	if (const auto *DSAT = dyn_cast<DependentSizedArrayType>(ATy))
	return cast<ArrayType>(
	getDependentSizedArrayType(NewEltTy,
	DSAT->getSizeExpr(),
	DSAT->getSizeModifier(),
	DSAT->getIndexTypeCVRQualifiers(),
	DSAT->getBracketsRange()));

	const auto *VAT = cast<VariableArrayType>(ATy);
	return cast<ArrayType>(getVariableArrayType(NewEltTy,
	VAT->getSizeExpr(),
	VAT->getSizeModifier(),
	VAT->getIndexTypeCVRQualifiers(),
	VAT->getBracketsRange()));
	}

	QualType ASTContext::getAdjustedParameterType(QualType T) const {
	if (T->isArrayType() \|\| T->isFunctionType())
	return getDecayedType(T);
	return T;
	}

	QualType ASTContext::getSignatureParameterType(QualType T) const {
	T = getVariableArrayDecayedType(T);
	T = getAdjustedParameterType(T);
	return T.getUnqualifiedType();
	}

	QualType ASTContext::getExceptionObjectType(QualType T) const {
	// C++ [except.throw]p3:
	// A throw-expression initializes a temporary object, called the exception
	// object, the type of which is determined by removing any top-level
	// cv-qualifiers from the static type of the operand of throw and adjusting
	// the type from "array of T" or "function returning T" to "pointer to T"
	// or "pointer to function returning T", [...]
	T = getVariableArrayDecayedType(T);
	if (T->isArrayType() \|\| T->isFunctionType())
	T = getDecayedType(T);
	return T.getUnqualifiedType();
	}

	/// getArrayDecayedType - Return the properly qualified result of decaying the
	/// specified array type to a pointer. This operation is non-trivial when
	/// handling typedefs etc. The canonical type of "T" must be an array type,
	/// this returns a pointer to a properly qualified element of the array.
	///
	/// See C99 6.7.5.3p7 and C99 6.3.2.1p3.
	QualType ASTContext::getArrayDecayedType(QualType Ty) const {
	// Get the element type with 'getAsArrayType' so that we don't lose any
	// typedefs in the element type of the array. This also handles propagation
	// of type qualifiers from the array type into the element type if present
	// (C99 6.7.3p8).
	const ArrayType *PrettyArrayType = getAsArrayType(Ty);
	assert(PrettyArrayType && "Not an array type!");

	QualType PtrTy = getPointerType(PrettyArrayType->getElementType());

	// int x[restrict 4] -> int *restrict
	QualType Result = getQualifiedType(PtrTy,
	PrettyArrayType->getIndexTypeQualifiers());

	// int x[_Nullable] -> int * _Nullable
	if (auto Nullability = Ty->getNullability()) {
	Result = const_cast<ASTContext *>(this)->getAttributedType(
	AttributedType::getNullabilityAttrKind(*Nullability), Result, Result);
	}
	return Result;
	}

	QualType ASTContext::getBaseElementType(const ArrayType *array) const {
	return getBaseElementType(array->getElementType());
	}

	QualType ASTContext::getBaseElementType(QualType type) const {
	Qualifiers qs;
	while (true) {
	SplitQualType split = type.getSplitDesugaredType();
	const ArrayType *array = split.Ty->getAsArrayTypeUnsafe();
	if (!array) break;

	type = array->getElementType();
	qs.addConsistentQualifiers(split.Quals);
	}

	return getQualifiedType(type, qs);
	}

	/// getConstantArrayElementCount - Returns number of constant array elements.
	uint64_t
	ASTContext::getConstantArrayElementCount(const ConstantArrayType *CA) const {
	uint64_t ElementCount = 1;
	do {
	ElementCount *= CA->getSize().getZExtValue();
	CA = dyn_cast_or_null<ConstantArrayType>(
	CA->getElementType()->getAsArrayTypeUnsafe());
	} while (CA);
	return ElementCount;
	}

	uint64_t ASTContext::getArrayInitLoopExprElementCount(
	const ArrayInitLoopExpr *AILE) const {
	if (!AILE)
	return 0;

	uint64_t ElementCount = 1;

	do {
	ElementCount *= AILE->getArraySize().getZExtValue();
	AILE = dyn_cast<ArrayInitLoopExpr>(AILE->getSubExpr());
	} while (AILE);

	return ElementCount;
	}

	/// getFloatingRank - Return a relative rank for floating point types.
	/// This routine will assert if passed a built-in type that isn't a float.
	static FloatingRank getFloatingRank(QualType T) {
	if (const auto *CT = T->getAs<ComplexType>())
	return getFloatingRank(CT->getElementType());

	switch (T->castAs<BuiltinType>()->getKind()) {
	default: llvm_unreachable("getFloatingRank(): not a floating type");
	case BuiltinType::Float16: return Float16Rank;
	case BuiltinType::Half: return HalfRank;
	case BuiltinType::Float: return FloatRank;
	case BuiltinType::Double: return DoubleRank;
	case BuiltinType::LongDouble: return LongDoubleRank;
	case BuiltinType::Float128: return Float128Rank;
	case BuiltinType::BFloat16: return BFloat16Rank;
	case BuiltinType::Ibm128: return Ibm128Rank;
	}
	}

	/// getFloatingTypeOrder - Compare the rank of the two specified floating
	/// point types, ignoring the domain of the type (i.e. 'double' ==
	/// '_Complex double'). If LHS > RHS, return 1. If LHS == RHS, return 0. If
	/// LHS < RHS, return -1.
	int ASTContext::getFloatingTypeOrder(QualType LHS, QualType RHS) const {
	FloatingRank LHSR = getFloatingRank(LHS);
	FloatingRank RHSR = getFloatingRank(RHS);

	if (LHSR == RHSR)
	return 0;
	if (LHSR > RHSR)
	return 1;
	return -1;
	}

	int ASTContext::getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const {
	if (&getFloatTypeSemantics(LHS) == &getFloatTypeSemantics(RHS))
	return 0;
	return getFloatingTypeOrder(LHS, RHS);
	}

	/// getIntegerRank - Return an integer conversion rank (C99 6.3.1.1p1). This
	/// routine will assert if passed a built-in type that isn't an integer or enum,
	/// or if it is not canonicalized.
	unsigned ASTContext::getIntegerRank(const Type *T) const {
	assert(T->isCanonicalUnqualified() && "T should be canonicalized");

	// Results in this 'losing' to any type of the same size, but winning if
	// larger.
	if (const auto *EIT = dyn_cast<BitIntType>(T))
	return 0 + (EIT->getNumBits() << 3);

	switch (cast<BuiltinType>(T)->getKind()) {
	default: llvm_unreachable("getIntegerRank(): not a built-in integer");
	case BuiltinType::Bool:
	return 1 + (getIntWidth(BoolTy) << 3);
	case BuiltinType::Char_S:
	case BuiltinType::Char_U:
	case BuiltinType::SChar:
	case BuiltinType::UChar:
	return 2 + (getIntWidth(CharTy) << 3);
	case BuiltinType::Short:
	case BuiltinType::UShort:
	return 3 + (getIntWidth(ShortTy) << 3);
	case BuiltinType::Int:
	case BuiltinType::UInt:
	return 4 + (getIntWidth(IntTy) << 3);
	case BuiltinType::Long:
	case BuiltinType::ULong:
	return 5 + (getIntWidth(LongTy) << 3);
	case BuiltinType::LongLong:
	case BuiltinType::ULongLong:
	return 6 + (getIntWidth(LongLongTy) << 3);
	case BuiltinType::Int128:
	case BuiltinType::UInt128:
	return 7 + (getIntWidth(Int128Ty) << 3);

	// "The ranks of char8_t, char16_t, char32_t, and wchar_t equal the ranks of
	// their underlying types" [c++20 conv.rank]
	case BuiltinType::Char8:
	return getIntegerRank(UnsignedCharTy.getTypePtr());
	case BuiltinType::Char16:
	return getIntegerRank(
	getFromTargetType(Target->getChar16Type()).getTypePtr());
	case BuiltinType::Char32:
	return getIntegerRank(
	getFromTargetType(Target->getChar32Type()).getTypePtr());
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	return getIntegerRank(
	getFromTargetType(Target->getWCharType()).getTypePtr());
	}
	}

	/// Whether this is a promotable bitfield reference according
	/// to C99 6.3.1.1p2, bullet 2 (and GCC extensions).
	///
	/// \returns the type this bit-field will promote to, or NULL if no
	/// promotion occurs.
	QualType ASTContext::isPromotableBitField(Expr *E) const {
	if (E->isTypeDependent() \|\| E->isValueDependent())
	return {};

	// C++ [conv.prom]p5:
	// If the bit-field has an enumerated type, it is treated as any other
	// value of that type for promotion purposes.
	if (getLangOpts().CPlusPlus && E->getType()->isEnumeralType())
	return {};

	// FIXME: We should not do this unless E->refersToBitField() is true. This
	// matters in C where getSourceBitField() will find bit-fields for various
	// cases where the source expression is not a bit-field designator.

	FieldDecl *Field = E->getSourceBitField(); // FIXME: conditional bit-fields?
	if (!Field)
	return {};

	QualType FT = Field->getType();

	uint64_t BitWidth = Field->getBitWidthValue(*this);
	uint64_t IntSize = getTypeSize(IntTy);
	// C++ [conv.prom]p5:
	// A prvalue for an integral bit-field can be converted to a prvalue of type
	// int if int can represent all the values of the bit-field; otherwise, it
	// can be converted to unsigned int if unsigned int can represent all the
	// values of the bit-field. If the bit-field is larger yet, no integral
	// promotion applies to it.
	// C11 6.3.1.1/2:
	// [For a bit-field of type _Bool, int, signed int, or unsigned int:]
	// If an int can represent all values of the original type (as restricted by
	// the width, for a bit-field), the value is converted to an int; otherwise,
	// it is converted to an unsigned int.
	//
	// FIXME: C does not permit promotion of a 'long : 3' bitfield to int.
	// We perform that promotion here to match GCC and C++.
	// FIXME: C does not permit promotion of an enum bit-field whose rank is
	// greater than that of 'int'. We perform that promotion to match GCC.
	if (BitWidth < IntSize)
	return IntTy;

	if (BitWidth == IntSize)
	return FT->isSignedIntegerType() ? IntTy : UnsignedIntTy;

	// Bit-fields wider than int are not subject to promotions, and therefore act
	// like the base type. GCC has some weird bugs in this area that we
	// deliberately do not follow (GCC follows a pre-standard resolution to
	// C's DR315 which treats bit-width as being part of the type, and this leaks
	// into their semantics in some cases).
	return {};
	}

	/// getPromotedIntegerType - Returns the type that Promotable will
	/// promote to: C99 6.3.1.1p2, assuming that Promotable is a promotable
	/// integer type.
	QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
	assert(!Promotable.isNull());
	assert(isPromotableIntegerType(Promotable));
	if (const auto *ET = Promotable->getAs<EnumType>())
	return ET->getDecl()->getPromotionType();

	if (const auto *BT = Promotable->getAs<BuiltinType>()) {
	// C++ [conv.prom]: A prvalue of type char16_t, char32_t, or wchar_t
	// (3.9.1) can be converted to a prvalue of the first of the following
	// types that can represent all the values of its underlying type:
	// int, unsigned int, long int, unsigned long int, long long int, or
	// unsigned long long int [...]
	// FIXME: Is there some better way to compute this?
	if (BT->getKind() == BuiltinType::WChar_S \|\|
	BT->getKind() == BuiltinType::WChar_U \|\|
	BT->getKind() == BuiltinType::Char8 \|\|
	BT->getKind() == BuiltinType::Char16 \|\|
	BT->getKind() == BuiltinType::Char32) {
	bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S;
	uint64_t FromSize = getTypeSize(BT);
	QualType PromoteTypes[] = { IntTy, UnsignedIntTy, LongTy, UnsignedLongTy,
	LongLongTy, UnsignedLongLongTy };
	for (const auto &PT : PromoteTypes) {
	uint64_t ToSize = getTypeSize(PT);
	if (FromSize < ToSize \|\|
	(FromSize == ToSize && FromIsSigned == PT->isSignedIntegerType()))
	return PT;
	}
	llvm_unreachable("char type should fit into long long");
	}
	}

	// At this point, we should have a signed or unsigned integer type.
	if (Promotable->isSignedIntegerType())
	return IntTy;
	uint64_t PromotableSize = getIntWidth(Promotable);
	uint64_t IntSize = getIntWidth(IntTy);
	assert(Promotable->isUnsignedIntegerType() && PromotableSize <= IntSize);
	return (PromotableSize != IntSize) ? IntTy : UnsignedIntTy;
	}

	/// Recurses in pointer/array types until it finds an objc retainable
	/// type and returns its ownership.
	Qualifiers::ObjCLifetime ASTContext::getInnerObjCOwnership(QualType T) const {
	while (!T.isNull()) {
	if (T.getObjCLifetime() != Qualifiers::OCL_None)
	return T.getObjCLifetime();
	if (T->isArrayType())
	T = getBaseElementType(T);
	else if (const auto *PT = T->getAs<PointerType>())
	T = PT->getPointeeType();
	else if (const auto *RT = T->getAs<ReferenceType>())
	T = RT->getPointeeType();
	else
	break;
	}

	return Qualifiers::OCL_None;
	}

	static const Type getIntegerTypeForEnum(const EnumType ET) {
	// Incomplete enum types are not treated as integer types.
	// FIXME: In C++, enum types are never integer types.
	if (ET->getDecl()->isComplete() && !ET->getDecl()->isScoped())
	return ET->getDecl()->getIntegerType().getTypePtr();
	return nullptr;
	}

	/// getIntegerTypeOrder - Returns the highest ranked integer type:
	/// C99 6.3.1.8p1. If LHS > RHS, return 1. If LHS == RHS, return 0. If
	/// LHS < RHS, return -1.
	int ASTContext::getIntegerTypeOrder(QualType LHS, QualType RHS) const {
	const Type *LHSC = getCanonicalType(LHS).getTypePtr();
	const Type *RHSC = getCanonicalType(RHS).getTypePtr();

	// Unwrap enums to their underlying type.
	if (const auto *ET = dyn_cast<EnumType>(LHSC))
	LHSC = getIntegerTypeForEnum(ET);
	if (const auto *ET = dyn_cast<EnumType>(RHSC))
	RHSC = getIntegerTypeForEnum(ET);

	if (LHSC == RHSC) return 0;

	bool LHSUnsigned = LHSC->isUnsignedIntegerType();
	bool RHSUnsigned = RHSC->isUnsignedIntegerType();

	unsigned LHSRank = getIntegerRank(LHSC);
	unsigned RHSRank = getIntegerRank(RHSC);

	if (LHSUnsigned == RHSUnsigned) { // Both signed or both unsigned.
	if (LHSRank == RHSRank) return 0;
	return LHSRank > RHSRank ? 1 : -1;
	}

	// Otherwise, the LHS is signed and the RHS is unsigned or visa versa.
	if (LHSUnsigned) {
	// If the unsigned [LHS] type is larger, return it.
	if (LHSRank >= RHSRank)
	return 1;

	// If the signed type can represent all values of the unsigned type, it
	// wins. Because we are dealing with 2's complement and types that are
	// powers of two larger than each other, this is always safe.
	return -1;
	}

	// If the unsigned [RHS] type is larger, return it.
	if (RHSRank >= LHSRank)
	return -1;

	// If the signed type can represent all values of the unsigned type, it
	// wins. Because we are dealing with 2's complement and types that are
	// powers of two larger than each other, this is always safe.
	return 1;
	}

	TypedefDecl *ASTContext::getCFConstantStringDecl() const {
	if (CFConstantStringTypeDecl)
	return CFConstantStringTypeDecl;

	assert(!CFConstantStringTagDecl &&
	"tag and typedef should be initialized together");
	CFConstantStringTagDecl = buildImplicitRecord("__NSConstantString_tag");
	CFConstantStringTagDecl->startDefinition();

	struct {
	QualType Type;
	const char *Name;
	} Fields[5];
	unsigned Count = 0;

	/// Objective-C ABI
	///
	/// typedef struct __NSConstantString_tag {
	/// const int *isa;
	/// int flags;
	/// const char *str;
	/// long length;
	/// } __NSConstantString;
	///
	/// Swift ABI (4.1, 4.2)
	///
	/// typedef struct __NSConstantString_tag {
	/// uintptr_t _cfisa;
	/// uintptr_t _swift_rc;
	/// _Atomic(uint64_t) _cfinfoa;
	/// const char *_ptr;
	/// uint32_t _length;
	/// } __NSConstantString;
	///
	/// Swift ABI (5.0)
	///
	/// typedef struct __NSConstantString_tag {
	/// uintptr_t _cfisa;
	/// uintptr_t _swift_rc;
	/// _Atomic(uint64_t) _cfinfoa;
	/// const char *_ptr;
	/// uintptr_t _length;
	/// } __NSConstantString;

	const auto CFRuntime = getLangOpts().CFRuntime;
	if (static_cast<unsigned>(CFRuntime) <
	static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift)) {
	Fields[Count++] = { getPointerType(IntTy.withConst()), "isa" };
	Fields[Count++] = { IntTy, "flags" };
	Fields[Count++] = { getPointerType(CharTy.withConst()), "str" };
	Fields[Count++] = { LongTy, "length" };
	} else {
	Fields[Count++] = { getUIntPtrType(), "_cfisa" };
	Fields[Count++] = { getUIntPtrType(), "_swift_rc" };
	Fields[Count++] = { getFromTargetType(Target->getUInt64Type()), "_swift_rc" };
	Fields[Count++] = { getPointerType(CharTy.withConst()), "_ptr" };
	if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 \|\|
	CFRuntime == LangOptions::CoreFoundationABI::Swift4_2)
	Fields[Count++] = { IntTy, "_ptr" };
	else
	Fields[Count++] = { getUIntPtrType(), "_ptr" };
	}

	// Create fields
	for (unsigned i = 0; i < Count; ++i) {
	FieldDecl *Field =
	FieldDecl::Create(*this, CFConstantStringTagDecl, SourceLocation(),
	SourceLocation(), &Idents.get(Fields[i].Name),
	Fields[i].Type, /TInfo=/nullptr,
	/BitWidth=/nullptr, /Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	CFConstantStringTagDecl->addDecl(Field);
	}

	CFConstantStringTagDecl->completeDefinition();
	// This type is designed to be compatible with NSConstantString, but cannot
	// use the same name, since NSConstantString is an interface.
	auto tagType = getTagDeclType(CFConstantStringTagDecl);
	CFConstantStringTypeDecl =
	buildImplicitTypedef(tagType, "__NSConstantString");

	return CFConstantStringTypeDecl;
	}

	RecordDecl *ASTContext::getCFConstantStringTagDecl() const {
	if (!CFConstantStringTagDecl)
	getCFConstantStringDecl(); // Build the tag and the typedef.
	return CFConstantStringTagDecl;
	}

	// getCFConstantStringType - Return the type used for constant CFStrings.
	QualType ASTContext::getCFConstantStringType() const {
	return getTypedefType(getCFConstantStringDecl());
	}

	QualType ASTContext::getObjCSuperType() const {
	if (ObjCSuperType.isNull()) {
	RecordDecl *ObjCSuperTypeDecl = buildImplicitRecord("objc_super");
	getTranslationUnitDecl()->addDecl(ObjCSuperTypeDecl);
	ObjCSuperType = getTagDeclType(ObjCSuperTypeDecl);
	}
	return ObjCSuperType;
	}

	void ASTContext::setCFConstantStringType(QualType T) {
	const auto *TD = T->castAs<TypedefType>();
	CFConstantStringTypeDecl = cast<TypedefDecl>(TD->getDecl());
	const auto *TagType =
	CFConstantStringTypeDecl->getUnderlyingType()->castAs<RecordType>();
	CFConstantStringTagDecl = TagType->getDecl();
	}

	QualType ASTContext::getBlockDescriptorType() const {
	if (BlockDescriptorType)
	return getTagDeclType(BlockDescriptorType);

	RecordDecl *RD;
	// FIXME: Needs the FlagAppleBlock bit.
	RD = buildImplicitRecord("__block_descriptor");
	RD->startDefinition();

	QualType FieldTypes[] = {
	UnsignedLongTy,
	UnsignedLongTy,
	};

	static const char *const FieldNames[] = {
	"reserved",
	"Size"
	};

	for (size_t i = 0; i < 2; ++i) {
	FieldDecl *Field = FieldDecl::Create(
	*this, RD, SourceLocation(), SourceLocation(),
	&Idents.get(FieldNames[i]), FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr, /Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	RD->addDecl(Field);
	}

	RD->completeDefinition();

	BlockDescriptorType = RD;

	return getTagDeclType(BlockDescriptorType);
	}

	QualType ASTContext::getBlockDescriptorExtendedType() const {
	if (BlockDescriptorExtendedType)
	return getTagDeclType(BlockDescriptorExtendedType);

	RecordDecl *RD;
	// FIXME: Needs the FlagAppleBlock bit.
	RD = buildImplicitRecord("__block_descriptor_withcopydispose");
	RD->startDefinition();

	QualType FieldTypes[] = {
	UnsignedLongTy,
	UnsignedLongTy,
	getPointerType(VoidPtrTy),
	getPointerType(VoidPtrTy)
	};

	static const char *const FieldNames[] = {
	"reserved",
	"Size",
	"CopyFuncPtr",
	"DestroyFuncPtr"
	};

	for (size_t i = 0; i < 4; ++i) {
	FieldDecl *Field = FieldDecl::Create(
	*this, RD, SourceLocation(), SourceLocation(),
	&Idents.get(FieldNames[i]), FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	RD->addDecl(Field);
	}

	RD->completeDefinition();

	BlockDescriptorExtendedType = RD;
	return getTagDeclType(BlockDescriptorExtendedType);
	}

	OpenCLTypeKind ASTContext::getOpenCLTypeKind(const Type *T) const {
	const auto *BT = dyn_cast<BuiltinType>(T);

	if (!BT) {
	if (isa<PipeType>(T))
	return OCLTK_Pipe;

	return OCLTK_Default;
	}

	switch (BT->getKind()) {
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id: \
	return OCLTK_Image;
	#include "clang/Basic/OpenCLImageTypes.def"

	case BuiltinType::OCLClkEvent:
	return OCLTK_ClkEvent;

	case BuiltinType::OCLEvent:
	return OCLTK_Event;

	case BuiltinType::OCLQueue:
	return OCLTK_Queue;

	case BuiltinType::OCLReserveID:
	return OCLTK_ReserveID;

	case BuiltinType::OCLSampler:
	return OCLTK_Sampler;

	default:
	return OCLTK_Default;
	}
	}

	LangAS ASTContext::getOpenCLTypeAddrSpace(const Type *T) const {
	return Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T));
	}

	/// BlockRequiresCopying - Returns true if byref variable "D" of type "Ty"
	/// requires copy/dispose. Note that this must match the logic
	/// in buildByrefHelpers.
	bool ASTContext::BlockRequiresCopying(QualType Ty,
	const VarDecl *D) {
	if (const CXXRecordDecl *record = Ty->getAsCXXRecordDecl()) {
	const Expr *copyExpr = getBlockVarCopyInit(D).getCopyExpr();
	if (!copyExpr && record->hasTrivialDestructor()) return false;

	return true;
	}

	// The block needs copy/destroy helpers if Ty is non-trivial to destructively
	// move or destroy.
	if (Ty.isNonTrivialToPrimitiveDestructiveMove() \|\| Ty.isDestructedType())
	return true;

	if (!Ty->isObjCRetainableType()) return false;

	Qualifiers qs = Ty.getQualifiers();

	// If we have lifetime, that dominates.
	if (Qualifiers::ObjCLifetime lifetime = qs.getObjCLifetime()) {
	switch (lifetime) {
	case Qualifiers::OCL_None: llvm_unreachable("impossible");

	// These are just bits as far as the runtime is concerned.
	case Qualifiers::OCL_ExplicitNone:
	case Qualifiers::OCL_Autoreleasing:
	return false;

	// These cases should have been taken care of when checking the type's
	// non-triviality.
	case Qualifiers::OCL_Weak:
	case Qualifiers::OCL_Strong:
	llvm_unreachable("impossible");
	}
	llvm_unreachable("fell out of lifetime switch!");
	}
	return (Ty->isBlockPointerType() \|\| isObjCNSObjectType(Ty) \|\|
	Ty->isObjCObjectPointerType());
	}

	bool ASTContext::getByrefLifetime(QualType Ty,
	Qualifiers::ObjCLifetime &LifeTime,
	bool &HasByrefExtendedLayout) const {
	if (!getLangOpts().ObjC \|\|
	getLangOpts().getGC() != LangOptions::NonGC)
	return false;

	HasByrefExtendedLayout = false;
	if (Ty->isRecordType()) {
	HasByrefExtendedLayout = true;
	LifeTime = Qualifiers::OCL_None;
	} else if ((LifeTime = Ty.getObjCLifetime())) {
	// Honor the ARC qualifiers.
	} else if (Ty->isObjCObjectPointerType() \|\| Ty->isBlockPointerType()) {
	// The MRR rule.
	LifeTime = Qualifiers::OCL_ExplicitNone;
	} else {
	LifeTime = Qualifiers::OCL_None;
	}
	return true;
	}

	CanQualType ASTContext::getNSUIntegerType() const {
	assert(Target && "Expected target to be initialized");
	const llvm::Triple &T = Target->getTriple();
	// Windows is LLP64 rather than LP64
	if (T.isOSWindows() && T.isArch64Bit())
	return UnsignedLongLongTy;
	return UnsignedLongTy;
	}

	CanQualType ASTContext::getNSIntegerType() const {
	assert(Target && "Expected target to be initialized");
	const llvm::Triple &T = Target->getTriple();
	// Windows is LLP64 rather than LP64
	if (T.isOSWindows() && T.isArch64Bit())
	return LongLongTy;
	return LongTy;
	}

	TypedefDecl *ASTContext::getObjCInstanceTypeDecl() {
	if (!ObjCInstanceTypeDecl)
	ObjCInstanceTypeDecl =
	buildImplicitTypedef(getObjCIdType(), "instancetype");
	return ObjCInstanceTypeDecl;
	}

	// This returns true if a type has been typedefed to BOOL:
	// typedef <type> BOOL;
	static bool isTypeTypedefedAsBOOL(QualType T) {
	if (const auto *TT = dyn_cast<TypedefType>(T))
	if (IdentifierInfo *II = TT->getDecl()->getIdentifier())
	return II->isStr("BOOL");

	return false;
	}

	/// getObjCEncodingTypeSize returns size of type for objective-c encoding
	/// purpose.
	CharUnits ASTContext::getObjCEncodingTypeSize(QualType type) const {
	if (!type->isIncompleteArrayType() && type->isIncompleteType())
	return CharUnits::Zero();

	CharUnits sz = getTypeSizeInChars(type);

	// Make all integer and enum types at least as large as an int
	if (sz.isPositive() && type->isIntegralOrEnumerationType())
	sz = std::max(sz, getTypeSizeInChars(IntTy));
	// Treat arrays as pointers, since that's how they're passed in.
	else if (type->isArrayType())
	sz = getTypeSizeInChars(VoidPtrTy);
	return sz;
	}

	bool ASTContext::isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const {
	return getTargetInfo().getCXXABI().isMicrosoft() &&
	VD->isStaticDataMember() &&
	VD->getType()->isIntegralOrEnumerationType() &&
	!VD->getFirstDecl()->isOutOfLine() && VD->getFirstDecl()->hasInit();
	}

	ASTContext::InlineVariableDefinitionKind
	ASTContext::getInlineVariableDefinitionKind(const VarDecl *VD) const {
	if (!VD->isInline())
	return InlineVariableDefinitionKind::None;

	// In almost all cases, it's a weak definition.
	auto *First = VD->getFirstDecl();
	if (First->isInlineSpecified() \|\| !First->isStaticDataMember())
	return InlineVariableDefinitionKind::Weak;

	// If there's a file-context declaration in this translation unit, it's a
	// non-discardable definition.
	for (auto *D : VD->redecls())
	if (D->getLexicalDeclContext()->isFileContext() &&
	!D->isInlineSpecified() && (D->isConstexpr() \|\| First->isConstexpr()))
	return InlineVariableDefinitionKind::Strong;

	// If we've not seen one yet, we don't know.
	return InlineVariableDefinitionKind::WeakUnknown;
	}

	static std::string charUnitsToString(const CharUnits &CU) {
	return llvm::itostr(CU.getQuantity());
	}

	/// getObjCEncodingForBlock - Return the encoded type for this block
	/// declaration.
	std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const {
	std::string S;

	const BlockDecl *Decl = Expr->getBlockDecl();
	QualType BlockTy =
	Expr->getType()->castAs<BlockPointerType>()->getPointeeType();
	QualType BlockReturnTy = BlockTy->castAs<FunctionType>()->getReturnType();
	// Encode result type.
	if (getLangOpts().EncodeExtendedBlockSig)
	getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, BlockReturnTy, S,
	true /Extended/);
	else
	getObjCEncodingForType(BlockReturnTy, S);
	// Compute size of all parameters.
	// Start with computing size of a pointer in number of bytes.
	// FIXME: There might(should) be a better way of doing this computation!
	CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
	CharUnits ParmOffset = PtrSize;
	for (auto *PI : Decl->parameters()) {
	QualType PType = PI->getType();
	CharUnits sz = getObjCEncodingTypeSize(PType);
	if (sz.isZero())
	continue;
	assert(sz.isPositive() && "BlockExpr - Incomplete param type");
	ParmOffset += sz;
	}
	// Size of the argument frame
	S += charUnitsToString(ParmOffset);
	// Block pointer and offset.
	S += "@?0";

	// Argument types.
	ParmOffset = PtrSize;
	for (auto *PVDecl : Decl->parameters()) {
	QualType PType = PVDecl->getOriginalType();
	if (const auto *AT =
	dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
	// Use array's original type only if it has known number of
	// elements.
	if (!isa<ConstantArrayType>(AT))
	PType = PVDecl->getType();
	} else if (PType->isFunctionType())
	PType = PVDecl->getType();
	if (getLangOpts().EncodeExtendedBlockSig)
	getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, PType,
	S, true /Extended/);
	else
	getObjCEncodingForType(PType, S);
	S += charUnitsToString(ParmOffset);
	ParmOffset += getObjCEncodingTypeSize(PType);
	}

	return S;
	}

	std::string
	ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl) const {
	std::string S;
	// Encode result type.
	getObjCEncodingForType(Decl->getReturnType(), S);
	CharUnits ParmOffset;
	// Compute size of all parameters.
	for (auto *PI : Decl->parameters()) {
	QualType PType = PI->getType();
	CharUnits sz = getObjCEncodingTypeSize(PType);
	if (sz.isZero())
	continue;

	assert(sz.isPositive() &&
	"getObjCEncodingForFunctionDecl - Incomplete param type");
	ParmOffset += sz;
	}
	S += charUnitsToString(ParmOffset);
	ParmOffset = CharUnits::Zero();

	// Argument types.
	for (auto *PVDecl : Decl->parameters()) {
	QualType PType = PVDecl->getOriginalType();
	if (const auto *AT =
	dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
	// Use array's original type only if it has known number of
	// elements.
	if (!isa<ConstantArrayType>(AT))
	PType = PVDecl->getType();
	} else if (PType->isFunctionType())
	PType = PVDecl->getType();
	getObjCEncodingForType(PType, S);
	S += charUnitsToString(ParmOffset);
	ParmOffset += getObjCEncodingTypeSize(PType);
	}

	return S;
	}

	/// getObjCEncodingForMethodParameter - Return the encoded type for a single
	/// method parameter or return type. If Extended, include class names and
	/// block object types.
	void ASTContext::getObjCEncodingForMethodParameter(Decl::ObjCDeclQualifier QT,
	QualType T, std::string& S,
	bool Extended) const {
	// Encode type qualifier, 'in', 'inout', etc. for the parameter.
	getObjCEncodingForTypeQualifier(QT, S);
	// Encode parameter type.
	ObjCEncOptions Options = ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()
	.setIsOutermostType();
	if (Extended)
	Options.setEncodeBlockParameters().setEncodeClassNames();
	getObjCEncodingForTypeImpl(T, S, Options, /Field=/nullptr);
	}

	/// getObjCEncodingForMethodDecl - Return the encoded type for this method
	/// declaration.
	std::string ASTContext::getObjCEncodingForMethodDecl(const ObjCMethodDecl *Decl,
	bool Extended) const {
	// FIXME: This is not very efficient.
	// Encode return type.
	std::string S;
	getObjCEncodingForMethodParameter(Decl->getObjCDeclQualifier(),
	Decl->getReturnType(), S, Extended);
	// Compute size of all parameters.
	// Start with computing size of a pointer in number of bytes.
	// FIXME: There might(should) be a better way of doing this computation!
	CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
	// The first two arguments (self and _cmd) are pointers; account for
	// their size.
	CharUnits ParmOffset = 2 * PtrSize;
	for (ObjCMethodDecl::param_const_iterator PI = Decl->param_begin(),
	E = Decl->sel_param_end(); PI != E; ++PI) {
	QualType PType = (*PI)->getType();
	CharUnits sz = getObjCEncodingTypeSize(PType);
	if (sz.isZero())
	continue;

	assert(sz.isPositive() &&
	"getObjCEncodingForMethodDecl - Incomplete param type");
	ParmOffset += sz;
	}
	S += charUnitsToString(ParmOffset);
	S += "@0:";
	S += charUnitsToString(PtrSize);

	// Argument types.
	ParmOffset = 2 * PtrSize;
	for (ObjCMethodDecl::param_const_iterator PI = Decl->param_begin(),
	E = Decl->sel_param_end(); PI != E; ++PI) {
	const ParmVarDecl PVDecl = PI;
	QualType PType = PVDecl->getOriginalType();
	if (const auto *AT =
	dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
	// Use array's original type only if it has known number of
	// elements.
	if (!isa<ConstantArrayType>(AT))
	PType = PVDecl->getType();
	} else if (PType->isFunctionType())
	PType = PVDecl->getType();
	getObjCEncodingForMethodParameter(PVDecl->getObjCDeclQualifier(),
	PType, S, Extended);
	S += charUnitsToString(ParmOffset);
	ParmOffset += getObjCEncodingTypeSize(PType);
	}

	return S;
	}

	ObjCPropertyImplDecl *
	ASTContext::getObjCPropertyImplDeclForPropertyDecl(
	const ObjCPropertyDecl *PD,
	const Decl *Container) const {
	if (!Container)
	return nullptr;
	if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(Container)) {
	for (auto *PID : CID->property_impls())
	if (PID->getPropertyDecl() == PD)
	return PID;
	} else {
	const auto *OID = cast<ObjCImplementationDecl>(Container);
	for (auto *PID : OID->property_impls())
	if (PID->getPropertyDecl() == PD)
	return PID;
	}
	return nullptr;
	}

	/// getObjCEncodingForPropertyDecl - Return the encoded type for this
	/// property declaration. If non-NULL, Container must be either an
	/// ObjCCategoryImplDecl or ObjCImplementationDecl; it should only be
	/// NULL when getting encodings for protocol properties.
	/// Property attributes are stored as a comma-delimited C string. The simple
	/// attributes readonly and bycopy are encoded as single characters. The
	/// parametrized attributes, getter=name, setter=name, and ivar=name, are
	/// encoded as single characters, followed by an identifier. Property types
	/// are also encoded as a parametrized attribute. The characters used to encode
	/// these attributes are defined by the following enumeration:
	/// @code
	/// enum PropertyAttributes {
	/// kPropertyReadOnly = 'R', // property is read-only.
	/// kPropertyBycopy = 'C', // property is a copy of the value last assigned
	/// kPropertyByref = '&', // property is a reference to the value last assigned
	/// kPropertyDynamic = 'D', // property is dynamic
	/// kPropertyGetter = 'G', // followed by getter selector name
	/// kPropertySetter = 'S', // followed by setter selector name
	/// kPropertyInstanceVariable = 'V' // followed by instance variable name
	/// kPropertyType = 'T' // followed by old-style type encoding.
	/// kPropertyWeak = 'W' // 'weak' property
	/// kPropertyStrong = 'P' // property GC'able
	/// kPropertyNonAtomic = 'N' // property non-atomic
	/// };
	/// @endcode
	std::string
	ASTContext::getObjCEncodingForPropertyDecl(const ObjCPropertyDecl *PD,
	const Decl *Container) const {
	// Collect information from the property implementation decl(s).
	bool Dynamic = false;
	ObjCPropertyImplDecl *SynthesizePID = nullptr;

	if (ObjCPropertyImplDecl *PropertyImpDecl =
	getObjCPropertyImplDeclForPropertyDecl(PD, Container)) {
	if (PropertyImpDecl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
	Dynamic = true;
	else
	SynthesizePID = PropertyImpDecl;
	}

	// FIXME: This is not very efficient.
	std::string S = "T";

	// Encode result type.
	// GCC has some special rules regarding encoding of properties which
	// closely resembles encoding of ivars.
	getObjCEncodingForPropertyType(PD->getType(), S);

	if (PD->isReadOnly()) {
	S += ",R";
	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_copy)
	S += ",C";
	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_retain)
	S += ",&";
	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_weak)
	S += ",W";
	} else {
	switch (PD->getSetterKind()) {
	case ObjCPropertyDecl::Assign: break;
	case ObjCPropertyDecl::Copy: S += ",C"; break;
	case ObjCPropertyDecl::Retain: S += ",&"; break;
	case ObjCPropertyDecl::Weak: S += ",W"; break;
	}
	}

	// It really isn't clear at all what this means, since properties
	// are "dynamic by default".
	if (Dynamic)
	S += ",D";

	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_nonatomic)
	S += ",N";

	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_getter) {
	S += ",G";
	S += PD->getGetterName().getAsString();
	}

	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_setter) {
	S += ",S";
	S += PD->getSetterName().getAsString();
	}

	if (SynthesizePID) {
	const ObjCIvarDecl *OID = SynthesizePID->getPropertyIvarDecl();
	S += ",V";
	S += OID->getNameAsString();
	}

	// FIXME: OBJCGC: weak & strong
	return S;
	}

	/// getLegacyIntegralTypeEncoding -
	/// Another legacy compatibility encoding: 32-bit longs are encoded as
	/// 'l' or 'L' , but not always. For typedefs, we need to use
	/// 'i' or 'I' instead if encoding a struct field, or a pointer!
	void ASTContext::getLegacyIntegralTypeEncoding (QualType &PointeeTy) const {
	if (PointeeTy->getAs<TypedefType>()) {
	if (const auto *BT = PointeeTy->getAs<BuiltinType>()) {
	if (BT->getKind() == BuiltinType::ULong && getIntWidth(PointeeTy) == 32)
	PointeeTy = UnsignedIntTy;
	else
	if (BT->getKind() == BuiltinType::Long && getIntWidth(PointeeTy) == 32)
	PointeeTy = IntTy;
	}
	}
	}

	void ASTContext::getObjCEncodingForType(QualType T, std::string& S,
	const FieldDecl *Field,
	QualType *NotEncodedT) const {
	// We follow the behavior of gcc, expanding structures which are
	// directly pointed to, and expanding embedded structures. Note that
	// these rules are sufficient to prevent recursive encoding of the
	// same type.
	getObjCEncodingForTypeImpl(T, S,
	ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()
	.setIsOutermostType(),
	Field, NotEncodedT);
	}

	void ASTContext::getObjCEncodingForPropertyType(QualType T,
	std::string& S) const {
	// Encode result type.
	// GCC has some special rules regarding encoding of properties which
	// closely resembles encoding of ivars.
	getObjCEncodingForTypeImpl(T, S,
	ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()
	.setIsOutermostType()
	.setEncodingProperty(),
	/Field=/nullptr);
	}

	static char getObjCEncodingForPrimitiveType(const ASTContext *C,
	const BuiltinType *BT) {
	BuiltinType::Kind kind = BT->getKind();
	switch (kind) {
	case BuiltinType::Void: return 'v';
	case BuiltinType::Bool: return 'B';
	case BuiltinType::Char8:
	case BuiltinType::Char_U:
	case BuiltinType::UChar: return 'C';
	case BuiltinType::Char16:
	case BuiltinType::UShort: return 'S';
	case BuiltinType::Char32:
	case BuiltinType::UInt: return 'I';
	case BuiltinType::ULong:
	return C->getTargetInfo().getLongWidth() == 32 ? 'L' : 'Q';
	case BuiltinType::UInt128: return 'T';
	case BuiltinType::ULongLong: return 'Q';
	case BuiltinType::Char_S:
	case BuiltinType::SChar: return 'c';
	case BuiltinType::Short: return 's';
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	case BuiltinType::Int: return 'i';
	case BuiltinType::Long:
	return C->getTargetInfo().getLongWidth() == 32 ? 'l' : 'q';
	case BuiltinType::LongLong: return 'q';
	case BuiltinType::Int128: return 't';
	case BuiltinType::Float: return 'f';
	case BuiltinType::Double: return 'd';
	case BuiltinType::LongDouble: return 'D';
	case BuiltinType::NullPtr: return ''; // like char

	case BuiltinType::BFloat16:
	case BuiltinType::Float16:
	case BuiltinType::Float128:
	case BuiltinType::Ibm128:
	case BuiltinType::Half:
	case BuiltinType::ShortAccum:
	case BuiltinType::Accum:
	case BuiltinType::LongAccum:
	case BuiltinType::UShortAccum:
	case BuiltinType::UAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::ShortFract:
	case BuiltinType::Fract:
	case BuiltinType::LongFract:
	case BuiltinType::UShortFract:
	case BuiltinType::UFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatLongAccum:
	case BuiltinType::SatUShortAccum:
	case BuiltinType::SatUAccum:
	case BuiltinType::SatULongAccum:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatUShortFract:
	case BuiltinType::SatUFract:
	case BuiltinType::SatULongFract:
	// FIXME: potentially need @encodes for these!
	return ' ';

	#define SVE_TYPE(Name, Id, SingletonId) \
	case BuiltinType::Id:
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/RISCVVTypes.def"
	{
	DiagnosticsEngine &Diags = C->getDiagnostics();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot yet @encode type %0");
	Diags.Report(DiagID) << BT->getName(C->getPrintingPolicy());
	return ' ';
	}

	case BuiltinType::ObjCId:
	case BuiltinType::ObjCClass:
	case BuiltinType::ObjCSel:
	llvm_unreachable("@encoding ObjC primitive type");

	// OpenCL and placeholder types don't need @encodings.
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLExtensionTypes.def"
	case BuiltinType::OCLEvent:
	case BuiltinType::OCLClkEvent:
	case BuiltinType::OCLQueue:
	case BuiltinType::OCLReserveID:
	case BuiltinType::OCLSampler:
	case BuiltinType::Dependent:
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case BuiltinType::Id:
	#include "clang/Basic/PPCTypes.def"
	#define BUILTIN_TYPE(KIND, ID)
	#define PLACEHOLDER_TYPE(KIND, ID) \
	case BuiltinType::KIND:
	#include "clang/AST/BuiltinTypes.def"
	llvm_unreachable("invalid builtin type for @encode");
	}
	llvm_unreachable("invalid BuiltinType::Kind value");
	}

	static char ObjCEncodingForEnumType(const ASTContext C, const EnumType ET) {
	EnumDecl *Enum = ET->getDecl();

	// The encoding of an non-fixed enum type is always 'i', regardless of size.
	if (!Enum->isFixed())
	return 'i';

	// The encoding of a fixed enum type matches its fixed underlying type.
	const auto *BT = Enum->getIntegerType()->castAs<BuiltinType>();
	return getObjCEncodingForPrimitiveType(C, BT);
	}

	static void EncodeBitField(const ASTContext *Ctx, std::string& S,
	QualType T, const FieldDecl *FD) {
	assert(FD->isBitField() && "not a bitfield - getObjCEncodingForTypeImpl");
	S += 'b';
	// The NeXT runtime encodes bit fields as b followed by the number of bits.
	// The GNU runtime requires more information; bitfields are encoded as b,
	// then the offset (in bits) of the first element, then the type of the
	// bitfield, then the size in bits. For example, in this structure:
	//
	// struct
	// {
	// int integer;
	// int flags:2;
	// };
	// On a 32-bit system, the encoding for flags would be b2 for the NeXT
	// runtime, but b32i2 for the GNU runtime. The reason for this extra
	// information is not especially sensible, but we're stuck with it for
	// compatibility with GCC, although providing it breaks anything that
	// actually uses runtime introspection and wants to work on both runtimes...
	if (Ctx->getLangOpts().ObjCRuntime.isGNUFamily()) {
	uint64_t Offset;

	if (const auto *IVD = dyn_cast<ObjCIvarDecl>(FD)) {
	Offset = Ctx->lookupFieldBitOffset(IVD->getContainingInterface(), nullptr,
	IVD);
	} else {
	const RecordDecl *RD = FD->getParent();
	const ASTRecordLayout &RL = Ctx->getASTRecordLayout(RD);
	Offset = RL.getFieldOffset(FD->getFieldIndex());
	}

	S += llvm::utostr(Offset);

	if (const auto *ET = T->getAs<EnumType>())
	S += ObjCEncodingForEnumType(Ctx, ET);
	else {
	const auto *BT = T->castAs<BuiltinType>();
	S += getObjCEncodingForPrimitiveType(Ctx, BT);
	}
	}
	S += llvm::utostr(FD->getBitWidthValue(*Ctx));
	}

	// Helper function for determining whether the encoded type string would include
	// a template specialization type.
	static bool hasTemplateSpecializationInEncodedString(const Type *T,
	bool VisitBasesAndFields) {
	T = T->getBaseElementTypeUnsafe();

	if (auto *PT = T->getAs<PointerType>())
	return hasTemplateSpecializationInEncodedString(
	PT->getPointeeType().getTypePtr(), false);

	auto *CXXRD = T->getAsCXXRecordDecl();

	if (!CXXRD)
	return false;

	if (isa<ClassTemplateSpecializationDecl>(CXXRD))
	return true;

	if (!CXXRD->hasDefinition() \|\| !VisitBasesAndFields)
	return false;

	for (auto B : CXXRD->bases())
	if (hasTemplateSpecializationInEncodedString(B.getType().getTypePtr(),
	true))
	return true;

	for (auto *FD : CXXRD->fields())
	if (hasTemplateSpecializationInEncodedString(FD->getType().getTypePtr(),
	true))
	return true;

	return false;
	}

	// FIXME: Use SmallString for accumulating string.
	void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
	const ObjCEncOptions Options,
	const FieldDecl *FD,
	QualType *NotEncodedT) const {
	CanQualType CT = getCanonicalType(T);
	switch (CT->getTypeClass()) {
	case Type::Builtin:
	case Type::Enum:
	if (FD && FD->isBitField())
	return EncodeBitField(this, S, T, FD);
	if (const auto *BT = dyn_cast<BuiltinType>(CT))
	S += getObjCEncodingForPrimitiveType(this, BT);
	else
	S += ObjCEncodingForEnumType(this, cast<EnumType>(CT));
	return;

	case Type::Complex:
	S += 'j';
	getObjCEncodingForTypeImpl(T->castAs<ComplexType>()->getElementType(), S,
	ObjCEncOptions(),
	/Field=/nullptr);
	return;

	case Type::Atomic:
	S += 'A';
	getObjCEncodingForTypeImpl(T->castAs<AtomicType>()->getValueType(), S,
	ObjCEncOptions(),
	/Field=/nullptr);
	return;

	// encoding for pointer or reference types.
	case Type::Pointer:
	case Type::LValueReference:
	case Type::RValueReference: {
	QualType PointeeTy;
	if (isa<PointerType>(CT)) {
	const auto *PT = T->castAs<PointerType>();
	if (PT->isObjCSelType()) {
	S += ':';
	return;
	}
	PointeeTy = PT->getPointeeType();
	} else {
	PointeeTy = T->castAs<ReferenceType>()->getPointeeType();
	}

	bool isReadOnly = false;
	// For historical/compatibility reasons, the read-only qualifier of the
	// pointee gets emitted _before_ the '^'. The read-only qualifier of
	// the pointer itself gets ignored, _unless_ we are looking at a typedef!
	// Also, do not emit the 'r' for anything but the outermost type!
	if (T->getAs<TypedefType>()) {
	if (Options.IsOutermostType() && T.isConstQualified()) {
	isReadOnly = true;
	S += 'r';
	}
	} else if (Options.IsOutermostType()) {
	QualType P = PointeeTy;
	while (auto PT = P->getAs<PointerType>())
	P = PT->getPointeeType();
	if (P.isConstQualified()) {
	isReadOnly = true;
	S += 'r';
	}
	}
	if (isReadOnly) {
	// Another legacy compatibility encoding. Some ObjC qualifier and type
	// combinations need to be rearranged.
	// Rewrite "in const" from "nr" to "rn"
	if (StringRef(S).endswith("nr"))
	S.replace(S.end()-2, S.end(), "rn");
	}

	if (PointeeTy->isCharType()) {
	// char pointer types should be encoded as '*' unless it is a
	// type that has been typedef'd to 'BOOL'.
	if (!isTypeTypedefedAsBOOL(PointeeTy)) {
	S += '*';
	return;
	}
	} else if (const auto *RTy = PointeeTy->getAs<RecordType>()) {
	// GCC binary compat: Need to convert "struct objc_class *" to "#".
	if (RTy->getDecl()->getIdentifier() == &Idents.get("objc_class")) {
	S += '#';
	return;
	}
	// GCC binary compat: Need to convert "struct objc_object *" to "@".
	if (RTy->getDecl()->getIdentifier() == &Idents.get("objc_object")) {
	S += '@';
	return;
	}
	// If the encoded string for the class includes template names, just emit
	// "^v" for pointers to the class.
	if (getLangOpts().CPlusPlus &&
	(!getLangOpts().EncodeCXXClassTemplateSpec &&
	hasTemplateSpecializationInEncodedString(
	RTy, Options.ExpandPointedToStructures()))) {
	S += "^v";
	return;
	}
	// fall through...
	}
	S += '^';
	getLegacyIntegralTypeEncoding(PointeeTy);

	ObjCEncOptions NewOptions;
	if (Options.ExpandPointedToStructures())
	NewOptions.setExpandStructures();
	getObjCEncodingForTypeImpl(PointeeTy, S, NewOptions,
	/Field=/nullptr, NotEncodedT);
	return;
	}

	case Type::ConstantArray:
	case Type::IncompleteArray:
	case Type::VariableArray: {
	const auto *AT = cast<ArrayType>(CT);

	if (isa<IncompleteArrayType>(AT) && !Options.IsStructField()) {
	// Incomplete arrays are encoded as a pointer to the array element.
	S += '^';

	getObjCEncodingForTypeImpl(
	AT->getElementType(), S,
	Options.keepingOnly(ObjCEncOptions().setExpandStructures()), FD);
	} else {
	S += '[';

	if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
	S += llvm::utostr(CAT->getSize().getZExtValue());
	else {
	//Variable length arrays are encoded as a regular array with 0 elements.
	assert((isa<VariableArrayType>(AT) \|\| isa<IncompleteArrayType>(AT)) &&
	"Unknown array type!");
	S += '0';
	}

	getObjCEncodingForTypeImpl(
	AT->getElementType(), S,
	Options.keepingOnly(ObjCEncOptions().setExpandStructures()), FD,
	NotEncodedT);
	S += ']';
	}
	return;
	}

	case Type::FunctionNoProto:
	case Type::FunctionProto:
	S += '?';
	return;

	case Type::Record: {
	RecordDecl *RDecl = cast<RecordType>(CT)->getDecl();
	S += RDecl->isUnion() ? '(' : '{';
	// Anonymous structures print as '?'
	if (const IdentifierInfo *II = RDecl->getIdentifier()) {
	S += II->getName();
	if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(RDecl)) {
	const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
	llvm::raw_string_ostream OS(S);
	printTemplateArgumentList(OS, TemplateArgs.asArray(),
	getPrintingPolicy());
	}
	} else {
	S += '?';
	}
	if (Options.ExpandStructures()) {
	S += '=';
	if (!RDecl->isUnion()) {
	getObjCEncodingForStructureImpl(RDecl, S, FD, true, NotEncodedT);
	} else {
	for (const auto *Field : RDecl->fields()) {
	if (FD) {
	S += '"';
	S += Field->getNameAsString();
	S += '"';
	}

	// Special case bit-fields.
	if (Field->isBitField()) {
	getObjCEncodingForTypeImpl(Field->getType(), S,
	ObjCEncOptions().setExpandStructures(),
	Field);
	} else {
	QualType qt = Field->getType();
	getLegacyIntegralTypeEncoding(qt);
	getObjCEncodingForTypeImpl(
	qt, S,
	ObjCEncOptions().setExpandStructures().setIsStructField(), FD,
	NotEncodedT);
	}
	}
	}
	}
	S += RDecl->isUnion() ? ')' : '}';
	return;
	}

	case Type::BlockPointer: {
	const auto *BT = T->castAs<BlockPointerType>();
	S += "@?"; // Unlike a pointer-to-function, which is "^?".
	if (Options.EncodeBlockParameters()) {
	const auto *FT = BT->getPointeeType()->castAs<FunctionType>();

	S += '<';
	// Block return type
	getObjCEncodingForTypeImpl(FT->getReturnType(), S,
	Options.forComponentType(), FD, NotEncodedT);
	// Block self
	S += "@?";
	// Block parameters
	if (const auto *FPT = dyn_cast<FunctionProtoType>(FT)) {
	for (const auto &I : FPT->param_types())
	getObjCEncodingForTypeImpl(I, S, Options.forComponentType(), FD,
	NotEncodedT);
	}
	S += '>';
	}
	return;
	}

	case Type::ObjCObject: {
	// hack to match legacy encoding of id and Class
	QualType Ty = getObjCObjectPointerType(CT);
	if (Ty->isObjCIdType()) {
	S += "{objc_object=}";
	return;
	}
	else if (Ty->isObjCClassType()) {
	S += "{objc_class=}";
	return;
	}
	// TODO: Double check to make sure this intentionally falls through.
	[[fallthrough]];
	}

	case Type::ObjCInterface: {
	// Ignore protocol qualifiers when mangling at this level.
	// @encode(class_name)
	ObjCInterfaceDecl *OI = T->castAs<ObjCObjectType>()->getInterface();
	S += '{';
	S += OI->getObjCRuntimeNameAsString();
	if (Options.ExpandStructures()) {
	S += '=';
	SmallVector<const ObjCIvarDecl*, 32> Ivars;
	DeepCollectObjCIvars(OI, true, Ivars);
	for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
	const FieldDecl *Field = Ivars[i];
	if (Field->isBitField())
	getObjCEncodingForTypeImpl(Field->getType(), S,
	ObjCEncOptions().setExpandStructures(),
	Field);
	else
	getObjCEncodingForTypeImpl(Field->getType(), S,
	ObjCEncOptions().setExpandStructures(), FD,
	NotEncodedT);
	}
	}
	S += '}';
	return;
	}

	case Type::ObjCObjectPointer: {
	const auto *OPT = T->castAs<ObjCObjectPointerType>();
	if (OPT->isObjCIdType()) {
	S += '@';
	return;
	}

	if (OPT->isObjCClassType() \|\| OPT->isObjCQualifiedClassType()) {
	// FIXME: Consider if we need to output qualifiers for 'Class<p>'.
	// Since this is a binary compatibility issue, need to consult with
	// runtime folks. Fortunately, this is a very obscure construct.
	S += '#';
	return;
	}

	if (OPT->isObjCQualifiedIdType()) {
	getObjCEncodingForTypeImpl(
	getObjCIdType(), S,
	Options.keepingOnly(ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()),
	FD);
	if (FD \|\| Options.EncodingProperty() \|\| Options.EncodeClassNames()) {
	// Note that we do extended encoding of protocol qualifier list
	// Only when doing ivar or property encoding.
	S += '"';
	for (const auto *I : OPT->quals()) {
	S += '<';
	S += I->getObjCRuntimeNameAsString();
	S += '>';
	}
	S += '"';
	}
	return;
	}

	S += '@';
	if (OPT->getInterfaceDecl() &&
	(FD \|\| Options.EncodingProperty() \|\| Options.EncodeClassNames())) {
	S += '"';
	S += OPT->getInterfaceDecl()->getObjCRuntimeNameAsString();
	for (const auto *I : OPT->quals()) {
	S += '<';
	S += I->getObjCRuntimeNameAsString();
	S += '>';
	}
	S += '"';
	}
	return;
	}

	// gcc just blithely ignores member pointers.
	// FIXME: we should do better than that. 'M' is available.
	case Type::MemberPointer:
	// This matches gcc's encoding, even though technically it is insufficient.
	//FIXME. We should do a better job than gcc.
	case Type::Vector:
	case Type::ExtVector:
	// Until we have a coherent encoding of these three types, issue warning.
	if (NotEncodedT)
	*NotEncodedT = T;
	return;

	case Type::ConstantMatrix:
	if (NotEncodedT)
	*NotEncodedT = T;
	return;

	case Type::BitInt:
	if (NotEncodedT)
	*NotEncodedT = T;
	return;

	// We could see an undeduced auto type here during error recovery.
	// Just ignore it.
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	return;

	case Type::Pipe:
	#define ABSTRACT_TYPE(KIND, BASE)
	#define TYPE(KIND, BASE)
	#define DEPENDENT_TYPE(KIND, BASE) \
	case Type::KIND:
	#define NON_CANONICAL_TYPE(KIND, BASE) \
	case Type::KIND:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(KIND, BASE) \
	case Type::KIND:
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("@encode for dependent type!");
	}
	llvm_unreachable("bad type kind!");
	}

	void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl,
	std::string &S,
	const FieldDecl *FD,
	bool includeVBases,
	QualType *NotEncodedT) const {
	assert(RDecl && "Expected non-null RecordDecl");
	assert(!RDecl->isUnion() && "Should not be called for unions");
	if (!RDecl->getDefinition() \|\| RDecl->getDefinition()->isInvalidDecl())
	return;

	const auto *CXXRec = dyn_cast<CXXRecordDecl>(RDecl);
	std::multimap<uint64_t, NamedDecl *> FieldOrBaseOffsets;
	const ASTRecordLayout &layout = getASTRecordLayout(RDecl);

	if (CXXRec) {
	for (const auto &BI : CXXRec->bases()) {
	if (!BI.isVirtual()) {
	CXXRecordDecl *base = BI.getType()->getAsCXXRecordDecl();
	if (base->isEmpty())
	continue;
	uint64_t offs = toBits(layout.getBaseClassOffset(base));
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
	std::make_pair(offs, base));
	}
	}
	}

	unsigned i = 0;
	for (FieldDecl *Field : RDecl->fields()) {
	if (!Field->isZeroLengthBitField(this) && Field->isZeroSize(this))
	continue;
	uint64_t offs = layout.getFieldOffset(i);
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
	std::make_pair(offs, Field));
	++i;
	}

	if (CXXRec && includeVBases) {
	for (const auto &BI : CXXRec->vbases()) {
	CXXRecordDecl *base = BI.getType()->getAsCXXRecordDecl();
	if (base->isEmpty())
	continue;
	uint64_t offs = toBits(layout.getVBaseClassOffset(base));
	if (offs >= uint64_t(toBits(layout.getNonVirtualSize())) &&
	FieldOrBaseOffsets.find(offs) == FieldOrBaseOffsets.end())
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.end(),
	std::make_pair(offs, base));
	}
	}

	CharUnits size;
	if (CXXRec) {
	size = includeVBases ? layout.getSize() : layout.getNonVirtualSize();
	} else {
	size = layout.getSize();
	}

	#ifndef NDEBUG
	uint64_t CurOffs = 0;
	#endif
	std::multimap<uint64_t, NamedDecl *>::iterator
	CurLayObj = FieldOrBaseOffsets.begin();

	if (CXXRec && CXXRec->isDynamicClass() &&
	(CurLayObj == FieldOrBaseOffsets.end() \|\| CurLayObj->first != 0)) {
	if (FD) {
	S += "\"_vptr$";
	std::string recname = CXXRec->getNameAsString();
	if (recname.empty()) recname = "?";
	S += recname;
	S += '"';
	}
	S += "^^?";
	#ifndef NDEBUG
	CurOffs += getTypeSize(VoidPtrTy);
	#endif
	}

	if (!RDecl->hasFlexibleArrayMember()) {
	// Mark the end of the structure.
	uint64_t offs = toBits(size);
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
	std::make_pair(offs, nullptr));
	}

	for (; CurLayObj != FieldOrBaseOffsets.end(); ++CurLayObj) {
	#ifndef NDEBUG
	assert(CurOffs <= CurLayObj->first);
	if (CurOffs < CurLayObj->first) {
	uint64_t padding = CurLayObj->first - CurOffs;
	// FIXME: There doesn't seem to be a way to indicate in the encoding that
	// packing/alignment of members is different that normal, in which case
	// the encoding will be out-of-sync with the real layout.
	// If the runtime switches to just consider the size of types without
	// taking into account alignment, we could make padding explicit in the
	// encoding (e.g. using arrays of chars). The encoding strings would be
	// longer then though.
	CurOffs += padding;
	}
	#endif

	NamedDecl *dcl = CurLayObj->second;
	if (!dcl)
	break; // reached end of structure.

	if (auto *base = dyn_cast<CXXRecordDecl>(dcl)) {
	// We expand the bases without their virtual bases since those are going
	// in the initial structure. Note that this differs from gcc which
	// expands virtual bases each time one is encountered in the hierarchy,
	// making the encoding type bigger than it really is.
	getObjCEncodingForStructureImpl(base, S, FD, /includeVBases/false,
	NotEncodedT);
	assert(!base->isEmpty());
	#ifndef NDEBUG
	CurOffs += toBits(getASTRecordLayout(base).getNonVirtualSize());
	#endif
	} else {
	const auto *field = cast<FieldDecl>(dcl);
	if (FD) {
	S += '"';
	S += field->getNameAsString();
	S += '"';
	}

	if (field->isBitField()) {
	EncodeBitField(this, S, field->getType(), field);
	#ifndef NDEBUG
	CurOffs += field->getBitWidthValue(*this);
	#endif
	} else {
	QualType qt = field->getType();
	getLegacyIntegralTypeEncoding(qt);
	getObjCEncodingForTypeImpl(
	qt, S, ObjCEncOptions().setExpandStructures().setIsStructField(),
	FD, NotEncodedT);
	#ifndef NDEBUG
	CurOffs += getTypeSize(field->getType());
	#endif
	}
	}
	}
	}

	void ASTContext::getObjCEncodingForTypeQualifier(Decl::ObjCDeclQualifier QT,
	std::string& S) const {
	if (QT & Decl::OBJC_TQ_In)
	S += 'n';
	if (QT & Decl::OBJC_TQ_Inout)
	S += 'N';
	if (QT & Decl::OBJC_TQ_Out)
	S += 'o';
	if (QT & Decl::OBJC_TQ_Bycopy)
	S += 'O';
	if (QT & Decl::OBJC_TQ_Byref)
	S += 'R';
	if (QT & Decl::OBJC_TQ_Oneway)
	S += 'V';
	}

	TypedefDecl *ASTContext::getObjCIdDecl() const {
	if (!ObjCIdDecl) {
	QualType T = getObjCObjectType(ObjCBuiltinIdTy, {}, {});
	T = getObjCObjectPointerType(T);
	ObjCIdDecl = buildImplicitTypedef(T, "id");
	}
	return ObjCIdDecl;
	}

	TypedefDecl *ASTContext::getObjCSelDecl() const {
	if (!ObjCSelDecl) {
	QualType T = getPointerType(ObjCBuiltinSelTy);
	ObjCSelDecl = buildImplicitTypedef(T, "SEL");
	}
	return ObjCSelDecl;
	}

	TypedefDecl *ASTContext::getObjCClassDecl() const {
	if (!ObjCClassDecl) {
	QualType T = getObjCObjectType(ObjCBuiltinClassTy, {}, {});
	T = getObjCObjectPointerType(T);
	ObjCClassDecl = buildImplicitTypedef(T, "Class");
	}
	return ObjCClassDecl;
	}

	ObjCInterfaceDecl *ASTContext::getObjCProtocolDecl() const {
	if (!ObjCProtocolClassDecl) {
	ObjCProtocolClassDecl
	= ObjCInterfaceDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(),
	&Idents.get("Protocol"),
	/typeParamList=/nullptr,
	/PrevDecl=/nullptr,
	SourceLocation(), true);
	}

	return ObjCProtocolClassDecl;
	}

	//===----------------------------------------------------------------------===//
	// __builtin_va_list Construction Functions
	//===----------------------------------------------------------------------===//

	static TypedefDecl CreateCharPtrNamedVaListDecl(const ASTContext Context,
	StringRef Name) {
	// typedef char* __builtin[_ms]_va_list;
	QualType T = Context->getPointerType(Context->CharTy);
	return Context->buildImplicitTypedef(T, Name);
	}

	static TypedefDecl CreateMSVaListDecl(const ASTContext Context) {
	return CreateCharPtrNamedVaListDecl(Context, "__builtin_ms_va_list");
	}

	static TypedefDecl CreateCharPtrBuiltinVaListDecl(const ASTContext Context) {
	return CreateCharPtrNamedVaListDecl(Context, "__builtin_va_list");
	}

	static TypedefDecl CreateVoidPtrBuiltinVaListDecl(const ASTContext Context) {
	// typedef void* __builtin_va_list;
	QualType T = Context->getPointerType(Context->VoidTy);
	return Context->buildImplicitTypedef(T, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list
	RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list");
	if (Context->getLangOpts().CPlusPlus) {
	// namespace std { struct __va_list {
	auto *NS = NamespaceDecl::Create(
	const_cast<ASTContext &>(*Context), Context->getTranslationUnitDecl(),
	/Inline=/false, SourceLocation(), SourceLocation(),
	&Context->Idents.get("std"),
	/PrevDecl=/nullptr, /Nested=/false);
	NS->setImplicit();
	VaListTagDecl->setDeclContext(NS);
	}

	VaListTagDecl->startDefinition();

	const size_t NumFields = 5;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// void *__stack;
	FieldTypes[0] = Context->getPointerType(Context->VoidTy);
	FieldNames[0] = "__stack";

	// void *__gr_top;
	FieldTypes[1] = Context->getPointerType(Context->VoidTy);
	FieldNames[1] = "__gr_top";

	// void *__vr_top;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "__vr_top";

	// int __gr_offs;
	FieldTypes[3] = Context->IntTy;
	FieldNames[3] = "__gr_offs";

	// int __vr_offs;
	FieldTypes[4] = Context->IntTy;
	FieldNames[4] = "__vr_offs";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// } __builtin_va_list;
	return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list");
	}

	static TypedefDecl CreatePowerABIBuiltinVaListDecl(const ASTContext Context) {
	// typedef struct __va_list_tag {
	RecordDecl *VaListTagDecl;

	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 5;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// unsigned char gpr;
	FieldTypes[0] = Context->UnsignedCharTy;
	FieldNames[0] = "gpr";

	// unsigned char fpr;
	FieldTypes[1] = Context->UnsignedCharTy;
	FieldNames[1] = "fpr";

	// unsigned short reserved;
	FieldTypes[2] = Context->UnsignedShortTy;
	FieldNames[2] = "reserved";

	// void* overflow_arg_area;
	FieldTypes[3] = Context->getPointerType(Context->VoidTy);
	FieldNames[3] = "overflow_arg_area";

	// void* reg_save_area;
	FieldTypes[4] = Context->getPointerType(Context->VoidTy);
	FieldNames[4] = "reg_save_area";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(Context, VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// } __va_list_tag;
	TypedefDecl *VaListTagTypedefDecl =
	Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");

	QualType VaListTagTypedefType =
	Context->getTypedefType(VaListTagTypedefDecl);

	// typedef __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType
	= Context->getConstantArrayType(VaListTagTypedefType,
	Size, nullptr, ArrayType::Normal, 0);
	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list_tag {
	RecordDecl *VaListTagDecl;
	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 4;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// unsigned gp_offset;
	FieldTypes[0] = Context->UnsignedIntTy;
	FieldNames[0] = "gp_offset";

	// unsigned fp_offset;
	FieldTypes[1] = Context->UnsignedIntTy;
	FieldNames[1] = "fp_offset";

	// void* overflow_arg_area;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "overflow_arg_area";

	// void* reg_save_area;
	FieldTypes[3] = Context->getPointerType(Context->VoidTy);
	FieldNames[3] = "reg_save_area";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// };

	// typedef struct __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType = Context->getConstantArrayType(
	VaListTagType, Size, nullptr, ArrayType::Normal, 0);
	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl CreatePNaClABIBuiltinVaListDecl(const ASTContext Context) {
	// typedef int __builtin_va_list[4];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 4);
	QualType IntArrayType = Context->getConstantArrayType(
	Context->IntTy, Size, nullptr, ArrayType::Normal, 0);
	return Context->buildImplicitTypedef(IntArrayType, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list
	RecordDecl *VaListDecl = Context->buildImplicitRecord("__va_list");
	if (Context->getLangOpts().CPlusPlus) {
	// namespace std { struct __va_list {
	NamespaceDecl *NS;
	NS = NamespaceDecl::Create(const_cast<ASTContext &>(*Context),
	Context->getTranslationUnitDecl(),
	/Inline=/false, SourceLocation(),
	SourceLocation(), &Context->Idents.get("std"),
	/PrevDecl=/nullptr, /Nested=/false);
	NS->setImplicit();
	VaListDecl->setDeclContext(NS);
	}

	VaListDecl->startDefinition();

	// void * __ap;
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get("__ap"),
	Context->getPointerType(Context->VoidTy),
	/TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListDecl->addDecl(Field);

	// };
	VaListDecl->completeDefinition();
	Context->VaListTagDecl = VaListDecl;

	// typedef struct __va_list __builtin_va_list;
	QualType T = Context->getRecordType(VaListDecl);
	return Context->buildImplicitTypedef(T, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateSystemZBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list_tag {
	RecordDecl *VaListTagDecl;
	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 4;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// long __gpr;
	FieldTypes[0] = Context->LongTy;
	FieldNames[0] = "__gpr";

	// long __fpr;
	FieldTypes[1] = Context->LongTy;
	FieldNames[1] = "__fpr";

	// void *__overflow_arg_area;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "__overflow_arg_area";

	// void *__reg_save_area;
	FieldTypes[3] = Context->getPointerType(Context->VoidTy);
	FieldNames[3] = "__reg_save_area";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// };

	// typedef __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType = Context->getConstantArrayType(
	VaListTagType, Size, nullptr, ArrayType::Normal, 0);

	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl CreateHexagonBuiltinVaListDecl(const ASTContext Context) {
	// typedef struct __va_list_tag {
	RecordDecl *VaListTagDecl;
	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 3;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// void *CurrentSavedRegisterArea;
	FieldTypes[0] = Context->getPointerType(Context->VoidTy);
	FieldNames[0] = "__current_saved_reg_area_pointer";

	// void *SavedRegAreaEnd;
	FieldTypes[1] = Context->getPointerType(Context->VoidTy);
	FieldNames[1] = "__saved_reg_area_end_pointer";

	// void *OverflowArea;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "__overflow_area_pointer";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl *Field = FieldDecl::Create(
	const_cast<ASTContext &>(*Context), VaListTagDecl, SourceLocation(),
	SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i],
	/TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// } __va_list_tag;
	TypedefDecl *VaListTagTypedefDecl =
	Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");

	QualType VaListTagTypedefType = Context->getTypedefType(VaListTagTypedefDecl);

	// typedef __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType = Context->getConstantArrayType(
	VaListTagTypedefType, Size, nullptr, ArrayType::Normal, 0);

	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl CreateVaListDecl(const ASTContext Context,
	TargetInfo::BuiltinVaListKind Kind) {
	switch (Kind) {
	case TargetInfo::CharPtrBuiltinVaList:
	return CreateCharPtrBuiltinVaListDecl(Context);
	case TargetInfo::VoidPtrBuiltinVaList:
	return CreateVoidPtrBuiltinVaListDecl(Context);
	case TargetInfo::AArch64ABIBuiltinVaList:
	return CreateAArch64ABIBuiltinVaListDecl(Context);
	case TargetInfo::PowerABIBuiltinVaList:
	return CreatePowerABIBuiltinVaListDecl(Context);
	case TargetInfo::X86_64ABIBuiltinVaList:
	return CreateX86_64ABIBuiltinVaListDecl(Context);
	case TargetInfo::PNaClABIBuiltinVaList:
	return CreatePNaClABIBuiltinVaListDecl(Context);
	case TargetInfo::AAPCSABIBuiltinVaList:
	return CreateAAPCSABIBuiltinVaListDecl(Context);
	case TargetInfo::SystemZBuiltinVaList:
	return CreateSystemZBuiltinVaListDecl(Context);
	case TargetInfo::HexagonBuiltinVaList:
	return CreateHexagonBuiltinVaListDecl(Context);
	}

	llvm_unreachable("Unhandled __builtin_va_list type kind");
	}

	TypedefDecl *ASTContext::getBuiltinVaListDecl() const {
	if (!BuiltinVaListDecl) {
	BuiltinVaListDecl = CreateVaListDecl(this, Target->getBuiltinVaListKind());
	assert(BuiltinVaListDecl->isImplicit());
	}

	return BuiltinVaListDecl;
	}

	Decl *ASTContext::getVaListTagDecl() const {
	// Force the creation of VaListTagDecl by building the __builtin_va_list
	// declaration.
	if (!VaListTagDecl)
	(void)getBuiltinVaListDecl();

	return VaListTagDecl;
	}

	TypedefDecl *ASTContext::getBuiltinMSVaListDecl() const {
	if (!BuiltinMSVaListDecl)
	BuiltinMSVaListDecl = CreateMSVaListDecl(this);

	return BuiltinMSVaListDecl;
	}

	bool ASTContext::canBuiltinBeRedeclared(const FunctionDecl *FD) const {
	// Allow redecl custom type checking builtin for HLSL.
	if (LangOpts.HLSL && FD->getBuiltinID() != Builtin::NotBuiltin &&
	BuiltinInfo.hasCustomTypechecking(FD->getBuiltinID()))
	return true;
	return BuiltinInfo.canBeRedeclared(FD->getBuiltinID());
	}

	void ASTContext::setObjCConstantStringInterface(ObjCInterfaceDecl *Decl) {
	assert(ObjCConstantStringType.isNull() &&
	"'NSConstantString' type already set!");

	ObjCConstantStringType = getObjCInterfaceType(Decl);
	}

	/// Retrieve the template name that corresponds to a non-empty
	/// lookup.
	TemplateName
	ASTContext::getOverloadedTemplateName(UnresolvedSetIterator Begin,
	UnresolvedSetIterator End) const {
	unsigned size = End - Begin;
	assert(size > 1 && "set is not overloaded!");

	void *memory = Allocate(sizeof(OverloadedTemplateStorage) +
	size * sizeof(FunctionTemplateDecl*));
	auto *OT = new (memory) OverloadedTemplateStorage(size);

	NamedDecl **Storage = OT->getStorage();
	for (UnresolvedSetIterator I = Begin; I != End; ++I) {
	NamedDecl D = I;
	assert(isa<FunctionTemplateDecl>(D) \|\|
	isa<UnresolvedUsingValueDecl>(D) \|\|
	(isa<UsingShadowDecl>(D) &&
	isa<FunctionTemplateDecl>(D->getUnderlyingDecl())));
	*Storage++ = D;
	}

	return TemplateName(OT);
	}

	/// Retrieve a template name representing an unqualified-id that has been
	/// assumed to name a template for ADL purposes.
	TemplateName ASTContext::getAssumedTemplateName(DeclarationName Name) const {
	auto OT = new (this) AssumedTemplateStorage(Name);
	return TemplateName(OT);
	}

	/// Retrieve the template name that represents a qualified
	/// template name such as \c std::vector.
	TemplateName ASTContext::getQualifiedTemplateName(NestedNameSpecifier *NNS,
	bool TemplateKeyword,
	TemplateName Template) const {
	assert(NNS && "Missing nested-name-specifier in qualified template name");

	// FIXME: Canonicalization?
	llvm::FoldingSetNodeID ID;
	QualifiedTemplateName::Profile(ID, NNS, TemplateKeyword, Template);

	void *InsertPos = nullptr;
	QualifiedTemplateName *QTN =
	QualifiedTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
	if (!QTN) {
	QTN = new (*this, alignof(QualifiedTemplateName))
	QualifiedTemplateName(NNS, TemplateKeyword, Template);
	QualifiedTemplateNames.InsertNode(QTN, InsertPos);
	}

	return TemplateName(QTN);
	}

	/// Retrieve the template name that represents a dependent
	/// template name such as \c MetaFun::template apply.
	TemplateName
	ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS,
	const IdentifierInfo *Name) const {
	assert((!NNS \|\| NNS->isDependent()) &&
	"Nested name specifier must be dependent");

	llvm::FoldingSetNodeID ID;
	DependentTemplateName::Profile(ID, NNS, Name);

	void *InsertPos = nullptr;
	DependentTemplateName *QTN =
	DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);

	if (QTN)
	return TemplateName(QTN);

	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
	if (CanonNNS == NNS) {
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Name);
	} else {
	TemplateName Canon = getDependentTemplateName(CanonNNS, Name);
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Name, Canon);
	DependentTemplateName *CheckQTN =
	DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckQTN && "Dependent type name canonicalization broken");
	(void)CheckQTN;
	}

	DependentTemplateNames.InsertNode(QTN, InsertPos);
	return TemplateName(QTN);
	}

	/// Retrieve the template name that represents a dependent
	/// template name such as \c MetaFun::template operator+.
	TemplateName
	ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS,
	OverloadedOperatorKind Operator) const {
	assert((!NNS \|\| NNS->isDependent()) &&
	"Nested name specifier must be dependent");

	llvm::FoldingSetNodeID ID;
	DependentTemplateName::Profile(ID, NNS, Operator);

	void *InsertPos = nullptr;
	DependentTemplateName *QTN
	= DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);

	if (QTN)
	return TemplateName(QTN);

	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
	if (CanonNNS == NNS) {
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Operator);
	} else {
	TemplateName Canon = getDependentTemplateName(CanonNNS, Operator);
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Operator, Canon);

	DependentTemplateName *CheckQTN
	= DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckQTN && "Dependent template name canonicalization broken");
	(void)CheckQTN;
	}

	DependentTemplateNames.InsertNode(QTN, InsertPos);
	return TemplateName(QTN);
	}

	TemplateName ASTContext::getSubstTemplateTemplateParm(
	TemplateName Replacement, Decl *AssociatedDecl, unsigned Index,
	std::optional<unsigned> PackIndex) const {
	llvm::FoldingSetNodeID ID;
	SubstTemplateTemplateParmStorage::Profile(ID, Replacement, AssociatedDecl,
	Index, PackIndex);

	void *insertPos = nullptr;
	SubstTemplateTemplateParmStorage *subst
	= SubstTemplateTemplateParms.FindNodeOrInsertPos(ID, insertPos);

	if (!subst) {
	subst = new (*this) SubstTemplateTemplateParmStorage(
	Replacement, AssociatedDecl, Index, PackIndex);
	SubstTemplateTemplateParms.InsertNode(subst, insertPos);
	}

	return TemplateName(subst);
	}

	TemplateName
	ASTContext::getSubstTemplateTemplateParmPack(const TemplateArgument &ArgPack,
	Decl *AssociatedDecl,
	unsigned Index, bool Final) const {
	auto &Self = const_cast<ASTContext &>(*this);
	llvm::FoldingSetNodeID ID;
	SubstTemplateTemplateParmPackStorage::Profile(ID, Self, ArgPack,
	AssociatedDecl, Index, Final);

	void *InsertPos = nullptr;
	SubstTemplateTemplateParmPackStorage *Subst
	= SubstTemplateTemplateParmPacks.FindNodeOrInsertPos(ID, InsertPos);

	if (!Subst) {
	Subst = new (*this) SubstTemplateTemplateParmPackStorage(
	ArgPack.pack_elements(), AssociatedDecl, Index, Final);
	SubstTemplateTemplateParmPacks.InsertNode(Subst, InsertPos);
	}

	return TemplateName(Subst);
	}

	/// getFromTargetType - Given one of the integer types provided by
	/// TargetInfo, produce the corresponding type. The unsigned @p Type
	/// is actually a value of type @c TargetInfo::IntType.
	CanQualType ASTContext::getFromTargetType(unsigned Type) const {
	switch (Type) {
	case TargetInfo::NoInt: return {};
	case TargetInfo::SignedChar: return SignedCharTy;
	case TargetInfo::UnsignedChar: return UnsignedCharTy;
	case TargetInfo::SignedShort: return ShortTy;
	case TargetInfo::UnsignedShort: return UnsignedShortTy;
	case TargetInfo::SignedInt: return IntTy;
	case TargetInfo::UnsignedInt: return UnsignedIntTy;
	case TargetInfo::SignedLong: return LongTy;
	case TargetInfo::UnsignedLong: return UnsignedLongTy;
	case TargetInfo::SignedLongLong: return LongLongTy;
	case TargetInfo::UnsignedLongLong: return UnsignedLongLongTy;
	}

	llvm_unreachable("Unhandled TargetInfo::IntType value");
	}

	//===----------------------------------------------------------------------===//
	// Type Predicates.
	//===----------------------------------------------------------------------===//

	/// getObjCGCAttr - Returns one of GCNone, Weak or Strong objc's
	/// garbage collection attribute.
	///
	Qualifiers::GC ASTContext::getObjCGCAttrKind(QualType Ty) const {
	if (getLangOpts().getGC() == LangOptions::NonGC)
	return Qualifiers::GCNone;

	assert(getLangOpts().ObjC);
	Qualifiers::GC GCAttrs = Ty.getObjCGCAttr();

	// Default behaviour under objective-C's gc is for ObjC pointers
	// (or pointers to them) be treated as though they were declared
	// as __strong.
	if (GCAttrs == Qualifiers::GCNone) {
	if (Ty->isObjCObjectPointerType() \|\| Ty->isBlockPointerType())
	return Qualifiers::Strong;
	else if (Ty->isPointerType())
	return getObjCGCAttrKind(Ty->castAs<PointerType>()->getPointeeType());
	} else {
	// It's not valid to set GC attributes on anything that isn't a
	// pointer.
	#ifndef NDEBUG
	QualType CT = Ty->getCanonicalTypeInternal();
	while (const auto *AT = dyn_cast<ArrayType>(CT))
	CT = AT->getElementType();
	assert(CT->isAnyPointerType() \|\| CT->isBlockPointerType());
	#endif
	}
	return GCAttrs;
	}

	//===----------------------------------------------------------------------===//
	// Type Compatibility Testing
	//===----------------------------------------------------------------------===//

	/// areCompatVectorTypes - Return true if the two specified vector types are
	/// compatible.
	static bool areCompatVectorTypes(const VectorType *LHS,
	const VectorType *RHS) {
	assert(LHS->isCanonicalUnqualified() && RHS->isCanonicalUnqualified());
	return LHS->getElementType() == RHS->getElementType() &&
	LHS->getNumElements() == RHS->getNumElements();
	}

	/// areCompatMatrixTypes - Return true if the two specified matrix types are
	/// compatible.
	static bool areCompatMatrixTypes(const ConstantMatrixType *LHS,
	const ConstantMatrixType *RHS) {
	assert(LHS->isCanonicalUnqualified() && RHS->isCanonicalUnqualified());
	return LHS->getElementType() == RHS->getElementType() &&
	LHS->getNumRows() == RHS->getNumRows() &&
	LHS->getNumColumns() == RHS->getNumColumns();
	}

	bool ASTContext::areCompatibleVectorTypes(QualType FirstVec,
	QualType SecondVec) {
	assert(FirstVec->isVectorType() && "FirstVec should be a vector type");
	assert(SecondVec->isVectorType() && "SecondVec should be a vector type");

	if (hasSameUnqualifiedType(FirstVec, SecondVec))
	return true;

	// Treat Neon vector types and most AltiVec vector types as if they are the
	// equivalent GCC vector types.
	const auto *First = FirstVec->castAs<VectorType>();
	const auto *Second = SecondVec->castAs<VectorType>();
	if (First->getNumElements() == Second->getNumElements() &&
	hasSameType(First->getElementType(), Second->getElementType()) &&
	First->getVectorKind() != VectorType::AltiVecPixel &&
	First->getVectorKind() != VectorType::AltiVecBool &&
	Second->getVectorKind() != VectorType::AltiVecPixel &&
	Second->getVectorKind() != VectorType::AltiVecBool &&
	First->getVectorKind() != VectorType::SveFixedLengthDataVector &&
	First->getVectorKind() != VectorType::SveFixedLengthPredicateVector &&
	Second->getVectorKind() != VectorType::SveFixedLengthDataVector &&
	Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector)
	return true;

	return false;
	}

	/// getSVETypeSize - Return SVE vector or predicate register size.
	static uint64_t getSVETypeSize(ASTContext &Context, const BuiltinType *Ty) {
	assert(Ty->isVLSTBuiltinType() && "Invalid SVE Type");
	return Ty->getKind() == BuiltinType::SveBool
	? (Context.getLangOpts().VScaleMin * 128) / Context.getCharWidth()
	: Context.getLangOpts().VScaleMin * 128;
	}

	bool ASTContext::areCompatibleSveTypes(QualType FirstType,
	QualType SecondType) {
	assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) \|\|
	(FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) &&
	"Expected SVE builtin type and vector type!");

	auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
	if (const auto *BT = FirstType->getAs<BuiltinType>()) {
	if (const auto *VT = SecondType->getAs<VectorType>()) {
	// Predicates have the same representation as uint8 so we also have to
	// check the kind to make these types incompatible.
	if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
	return BT->getKind() == BuiltinType::SveBool;
	else if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector)
	return VT->getElementType().getCanonicalType() ==
	FirstType->getSveEltType(*this);
	else if (VT->getVectorKind() == VectorType::GenericVector)
	return getTypeSize(SecondType) == getSVETypeSize(*this, BT) &&
	hasSameType(VT->getElementType(),
	getBuiltinVectorTypeInfo(BT).ElementType);
	}
	}
	return false;
	};

	return IsValidCast(FirstType, SecondType) \|\|
	IsValidCast(SecondType, FirstType);
	}

	bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType,
	QualType SecondType) {
	assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) \|\|
	(FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) &&
	"Expected SVE builtin type and vector type!");

	auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) {
	const auto *BT = FirstType->getAs<BuiltinType>();
	if (!BT)
	return false;

	const auto *VecTy = SecondType->getAs<VectorType>();
	if (VecTy &&
	(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector \|\|
	VecTy->getVectorKind() == VectorType::GenericVector)) {
	const LangOptions::LaxVectorConversionKind LVCKind =
	getLangOpts().getLaxVectorConversions();

	// Can not convert between sve predicates and sve vectors because of
	// different size.
	if (BT->getKind() == BuiltinType::SveBool &&
	VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector)
	return false;

	// If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion.
	// "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly
	// converts to VLAT and VLAT implicitly converts to GNUT."
	// ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and
	// predicates.
	if (VecTy->getVectorKind() == VectorType::GenericVector &&
	getTypeSize(SecondType) != getSVETypeSize(*this, BT))
	return false;

	// If -flax-vector-conversions=all is specified, the types are
	// certainly compatible.
	if (LVCKind == LangOptions::LaxVectorConversionKind::All)
	return true;

	// If -flax-vector-conversions=integer is specified, the types are
	// compatible if the elements are integer types.
	if (LVCKind == LangOptions::LaxVectorConversionKind::Integer)
	return VecTy->getElementType().getCanonicalType()->isIntegerType() &&
	FirstType->getSveEltType(*this)->isIntegerType();
	}

	return false;
	};

	return IsLaxCompatible(FirstType, SecondType) \|\|
	IsLaxCompatible(SecondType, FirstType);
	}

	bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const {
	while (true) {
	// __strong id
	if (const AttributedType *Attr = dyn_cast<AttributedType>(Ty)) {
	if (Attr->getAttrKind() == attr::ObjCOwnership)
	return true;

	Ty = Attr->getModifiedType();

	// X *__strong (...)
	} else if (const ParenType *Paren = dyn_cast<ParenType>(Ty)) {
	Ty = Paren->getInnerType();

	// We do not want to look through typedefs, typeof(expr),
	// typeof(type), or any other way that the type is somehow
	// abstracted.
	} else {
	return false;
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// ObjCQualifiedIdTypesAreCompatible - Compatibility testing for qualified id's.
	//===----------------------------------------------------------------------===//

	/// ProtocolCompatibleWithProtocol - return 'true' if 'lProto' is in the
	/// inheritance hierarchy of 'rProto'.
	bool
	ASTContext::ProtocolCompatibleWithProtocol(ObjCProtocolDecl *lProto,
	ObjCProtocolDecl *rProto) const {
	if (declaresSameEntity(lProto, rProto))
	return true;
	for (auto *PI : rProto->protocols())
	if (ProtocolCompatibleWithProtocol(lProto, PI))
	return true;
	return false;
	}

	/// ObjCQualifiedClassTypesAreCompatible - compare Class<pr,...> and
	/// Class<pr1, ...>.
	bool ASTContext::ObjCQualifiedClassTypesAreCompatible(
	const ObjCObjectPointerType lhs, const ObjCObjectPointerType rhs) {
	for (auto *lhsProto : lhs->quals()) {
	bool match = false;
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto)) {
	match = true;
	break;
	}
	}
	if (!match)
	return false;
	}
	return true;
	}

	/// ObjCQualifiedIdTypesAreCompatible - We know that one of lhs/rhs is an
	/// ObjCQualifiedIDType.
	bool ASTContext::ObjCQualifiedIdTypesAreCompatible(
	const ObjCObjectPointerType lhs, const ObjCObjectPointerType rhs,
	bool compare) {
	// Allow id<P..> and an 'id' in all cases.
	if (lhs->isObjCIdType() \|\| rhs->isObjCIdType())
	return true;

	// Don't allow id<P..> to convert to Class or Class<P..> in either direction.
	if (lhs->isObjCClassType() \|\| lhs->isObjCQualifiedClassType() \|\|
	rhs->isObjCClassType() \|\| rhs->isObjCQualifiedClassType())
	return false;

	if (lhs->isObjCQualifiedIdType()) {
	if (rhs->qual_empty()) {
	// If the RHS is a unqualified interface pointer "NSString*",
	// make sure we check the class hierarchy.
	if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) {
	for (auto *I : lhs->quals()) {
	// when comparing an id<P> on lhs with a static type on rhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	if (!rhsID->ClassImplementsProtocol(I, true))
	return false;
	}
	}
	// If there are no qualifiers and no interface, we have an 'id'.
	return true;
	}
	// Both the right and left sides have qualifiers.
	for (auto *lhsProto : lhs->quals()) {
	bool match = false;

	// when comparing an id<P> on lhs with a static type on rhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) \|\|
	(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
	match = true;
	break;
	}
	}
	// If the RHS is a qualified interface pointer "NSString<P>*",
	// make sure we check the class hierarchy.
	if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) {
	for (auto *I : lhs->quals()) {
	// when comparing an id<P> on lhs with a static type on rhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	if (rhsID->ClassImplementsProtocol(I, true)) {
	match = true;
	break;
	}
	}
	}
	if (!match)
	return false;
	}

	return true;
	}

	assert(rhs->isObjCQualifiedIdType() && "One of the LHS/RHS should be id<x>");

	if (lhs->getInterfaceType()) {
	// If both the right and left sides have qualifiers.
	for (auto *lhsProto : lhs->quals()) {
	bool match = false;

	// when comparing an id<P> on rhs with a static type on lhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	// First, lhs protocols in the qualifier list must be found, direct
	// or indirect in rhs's qualifier list or it is a mismatch.
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) \|\|
	(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
	match = true;
	break;
	}
	}
	if (!match)
	return false;
	}

	// Static class's protocols, or its super class or category protocols
	// must be found, direct or indirect in rhs's qualifier list or it is a mismatch.
	if (ObjCInterfaceDecl *lhsID = lhs->getInterfaceDecl()) {
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> LHSInheritedProtocols;
	CollectInheritedProtocols(lhsID, LHSInheritedProtocols);
	// This is rather dubious but matches gcc's behavior. If lhs has
	// no type qualifier and its class has no static protocol(s)
	// assume that it is mismatch.
	if (LHSInheritedProtocols.empty() && lhs->qual_empty())
	return false;
	for (auto *lhsProto : LHSInheritedProtocols) {
	bool match = false;
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) \|\|
	(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
	match = true;
	break;
	}
	}
	if (!match)
	return false;
	}
	}
	return true;
	}
	return false;
	}

	/// canAssignObjCInterfaces - Return true if the two interface types are
	/// compatible for assignment from RHS to LHS. This handles validation of any
	/// protocol qualifiers on the LHS or RHS.
	bool ASTContext::canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT,
	const ObjCObjectPointerType *RHSOPT) {
	const ObjCObjectType* LHS = LHSOPT->getObjectType();
	const ObjCObjectType* RHS = RHSOPT->getObjectType();

	// If either type represents the built-in 'id' type, return true.
	if (LHS->isObjCUnqualifiedId() \|\| RHS->isObjCUnqualifiedId())
	return true;

	// Function object that propagates a successful result or handles
	// __kindof types.
	auto finish = [&](bool succeeded) -> bool {
	if (succeeded)
	return true;

	if (!RHS->isKindOfType())
	return false;

	// Strip off __kindof and protocol qualifiers, then check whether
	// we can assign the other way.
	return canAssignObjCInterfaces(RHSOPT->stripObjCKindOfTypeAndQuals(*this),
	LHSOPT->stripObjCKindOfTypeAndQuals(*this));
	};

	// Casts from or to id<P> are allowed when the other side has compatible
	// protocols.
	if (LHS->isObjCQualifiedId() \|\| RHS->isObjCQualifiedId()) {
	return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false));
	}

	// Verify protocol compatibility for casts from Class<P1> to Class<P2>.
	if (LHS->isObjCQualifiedClass() && RHS->isObjCQualifiedClass()) {
	return finish(ObjCQualifiedClassTypesAreCompatible(LHSOPT, RHSOPT));
	}

	// Casts from Class to Class<Foo>, or vice-versa, are allowed.
	if (LHS->isObjCClass() && RHS->isObjCClass()) {
	return true;
	}

	// If we have 2 user-defined types, fall into that path.
	if (LHS->getInterface() && RHS->getInterface()) {
	return finish(canAssignObjCInterfaces(LHS, RHS));
	}

	return false;
	}

	/// canAssignObjCInterfacesInBlockPointer - This routine is specifically written
	/// for providing type-safety for objective-c pointers used to pass/return
	/// arguments in block literals. When passed as arguments, passing 'A*' where
	/// 'id' is expected is not OK. Passing 'Sub " where 'Super " is expected is
	/// not OK. For the return type, the opposite is not OK.
	bool ASTContext::canAssignObjCInterfacesInBlockPointer(
	const ObjCObjectPointerType *LHSOPT,
	const ObjCObjectPointerType *RHSOPT,
	bool BlockReturnType) {

	// Function object that propagates a successful result or handles
	// __kindof types.
	auto finish = [&](bool succeeded) -> bool {
	if (succeeded)
	return true;

	const ObjCObjectPointerType *Expected = BlockReturnType ? RHSOPT : LHSOPT;
	if (!Expected->isKindOfType())
	return false;

	// Strip off __kindof and protocol qualifiers, then check whether
	// we can assign the other way.
	return canAssignObjCInterfacesInBlockPointer(
	RHSOPT->stripObjCKindOfTypeAndQuals(*this),
	LHSOPT->stripObjCKindOfTypeAndQuals(*this),
	BlockReturnType);
	};

	if (RHSOPT->isObjCBuiltinType() \|\| LHSOPT->isObjCIdType())
	return true;

	if (LHSOPT->isObjCBuiltinType()) {
	return finish(RHSOPT->isObjCBuiltinType() \|\|
	RHSOPT->isObjCQualifiedIdType());
	}

	if (LHSOPT->isObjCQualifiedIdType() \|\| RHSOPT->isObjCQualifiedIdType()) {
	if (getLangOpts().CompatibilityQualifiedIdBlockParamTypeChecking)
	// Use for block parameters previous type checking for compatibility.
	return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false) \|\|
	// Or corrected type checking as in non-compat mode.
	(!BlockReturnType &&
	ObjCQualifiedIdTypesAreCompatible(RHSOPT, LHSOPT, false)));
	else
	return finish(ObjCQualifiedIdTypesAreCompatible(
	(BlockReturnType ? LHSOPT : RHSOPT),
	(BlockReturnType ? RHSOPT : LHSOPT), false));
	}

	const ObjCInterfaceType* LHS = LHSOPT->getInterfaceType();
	const ObjCInterfaceType* RHS = RHSOPT->getInterfaceType();
	if (LHS && RHS) { // We have 2 user-defined types.
	if (LHS != RHS) {
	if (LHS->getDecl()->isSuperClassOf(RHS->getDecl()))
	return finish(BlockReturnType);
	if (RHS->getDecl()->isSuperClassOf(LHS->getDecl()))
	return finish(!BlockReturnType);
	}
	else
	return true;
	}
	return false;
	}

	/// Comparison routine for Objective-C protocols to be used with
	/// llvm::array_pod_sort.
	static int compareObjCProtocolsByName(ObjCProtocolDecl * const *lhs,
	ObjCProtocolDecl * const *rhs) {
	return (lhs)->getName().compare((rhs)->getName());
	}

	/// getIntersectionOfProtocols - This routine finds the intersection of set
	/// of protocols inherited from two distinct objective-c pointer objects with
	/// the given common base.
	/// It is used to build composite qualifier list of the composite type of
	/// the conditional expression involving two objective-c pointer objects.
	static
	void getIntersectionOfProtocols(ASTContext &Context,
	const ObjCInterfaceDecl *CommonBase,
	const ObjCObjectPointerType *LHSOPT,
	const ObjCObjectPointerType *RHSOPT,
	SmallVectorImpl<ObjCProtocolDecl *> &IntersectionSet) {

	const ObjCObjectType* LHS = LHSOPT->getObjectType();
	const ObjCObjectType* RHS = RHSOPT->getObjectType();
	assert(LHS->getInterface() && "LHS must have an interface base");
	assert(RHS->getInterface() && "RHS must have an interface base");

	// Add all of the protocols for the LHS.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> LHSProtocolSet;

	// Start with the protocol qualifiers.
	for (auto *proto : LHS->quals()) {
	Context.CollectInheritedProtocols(proto, LHSProtocolSet);
	}

	// Also add the protocols associated with the LHS interface.
	Context.CollectInheritedProtocols(LHS->getInterface(), LHSProtocolSet);

	// Add all of the protocols for the RHS.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> RHSProtocolSet;

	// Start with the protocol qualifiers.
	for (auto *proto : RHS->quals()) {
	Context.CollectInheritedProtocols(proto, RHSProtocolSet);
	}

	// Also add the protocols associated with the RHS interface.
	Context.CollectInheritedProtocols(RHS->getInterface(), RHSProtocolSet);

	// Compute the intersection of the collected protocol sets.
	for (auto *proto : LHSProtocolSet) {
	if (RHSProtocolSet.count(proto))
	IntersectionSet.push_back(proto);
	}

	// Compute the set of protocols that is implied by either the common type or
	// the protocols within the intersection.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> ImpliedProtocols;
	Context.CollectInheritedProtocols(CommonBase, ImpliedProtocols);

	// Remove any implied protocols from the list of inherited protocols.
	if (!ImpliedProtocols.empty()) {
	llvm::erase_if(IntersectionSet, [&](ObjCProtocolDecl *proto) -> bool {
	return ImpliedProtocols.contains(proto);
	});
	}

	// Sort the remaining protocols by name.
	llvm::array_pod_sort(IntersectionSet.begin(), IntersectionSet.end(),
	compareObjCProtocolsByName);
	}

	/// Determine whether the first type is a subtype of the second.
	static bool canAssignObjCObjectTypes(ASTContext &ctx, QualType lhs,
	QualType rhs) {
	// Common case: two object pointers.
	const auto *lhsOPT = lhs->getAs<ObjCObjectPointerType>();
	const auto *rhsOPT = rhs->getAs<ObjCObjectPointerType>();
	if (lhsOPT && rhsOPT)
	return ctx.canAssignObjCInterfaces(lhsOPT, rhsOPT);

	// Two block pointers.
	const auto *lhsBlock = lhs->getAs<BlockPointerType>();
	const auto *rhsBlock = rhs->getAs<BlockPointerType>();
	if (lhsBlock && rhsBlock)
	return ctx.typesAreBlockPointerCompatible(lhs, rhs);

	// If either is an unqualified 'id' and the other is a block, it's
	// acceptable.
	if ((lhsOPT && lhsOPT->isObjCIdType() && rhsBlock) \|\|
	(rhsOPT && rhsOPT->isObjCIdType() && lhsBlock))
	return true;

	return false;
	}

	// Check that the given Objective-C type argument lists are equivalent.
	static bool sameObjCTypeArgs(ASTContext &ctx,
	const ObjCInterfaceDecl *iface,
	ArrayRef<QualType> lhsArgs,
	ArrayRef<QualType> rhsArgs,
	bool stripKindOf) {
	if (lhsArgs.size() != rhsArgs.size())
	return false;

	ObjCTypeParamList *typeParams = iface->getTypeParamList();
	for (unsigned i = 0, n = lhsArgs.size(); i != n; ++i) {
	if (ctx.hasSameType(lhsArgs[i], rhsArgs[i]))
	continue;

	switch (typeParams->begin()[i]->getVariance()) {
	case ObjCTypeParamVariance::Invariant:
	if (!stripKindOf \|\|
	!ctx.hasSameType(lhsArgs[i].stripObjCKindOfType(ctx),
	rhsArgs[i].stripObjCKindOfType(ctx))) {
	return false;
	}
	break;

	case ObjCTypeParamVariance::Covariant:
	if (!canAssignObjCObjectTypes(ctx, lhsArgs[i], rhsArgs[i]))
	return false;
	break;

	case ObjCTypeParamVariance::Contravariant:
	if (!canAssignObjCObjectTypes(ctx, rhsArgs[i], lhsArgs[i]))
	return false;
	break;
	}
	}

	return true;
	}

	QualType ASTContext::areCommonBaseCompatible(
	const ObjCObjectPointerType *Lptr,
	const ObjCObjectPointerType *Rptr) {
	const ObjCObjectType *LHS = Lptr->getObjectType();
	const ObjCObjectType *RHS = Rptr->getObjectType();
	const ObjCInterfaceDecl* LDecl = LHS->getInterface();
	const ObjCInterfaceDecl* RDecl = RHS->getInterface();

	if (!LDecl \|\| !RDecl)
	return {};

	// When either LHS or RHS is a kindof type, we should return a kindof type.
	// For example, for common base of kindof(ASub1) and kindof(ASub2), we return
	// kindof(A).
	bool anyKindOf = LHS->isKindOfType() \|\| RHS->isKindOfType();

	// Follow the left-hand side up the class hierarchy until we either hit a
	// root or find the RHS. Record the ancestors in case we don't find it.
	llvm::SmallDenseMap<const ObjCInterfaceDecl , const ObjCObjectType , 4>
	LHSAncestors;
	while (true) {
	// Record this ancestor. We'll need this if the common type isn't in the
	// path from the LHS to the root.
	LHSAncestors[LHS->getInterface()->getCanonicalDecl()] = LHS;

	if (declaresSameEntity(LHS->getInterface(), RDecl)) {
	// Get the type arguments.
	ArrayRef<QualType> LHSTypeArgs = LHS->getTypeArgsAsWritten();
	bool anyChanges = false;
	if (LHS->isSpecialized() && RHS->isSpecialized()) {
	// Both have type arguments, compare them.
	if (!sameObjCTypeArgs(*this, LHS->getInterface(),
	LHS->getTypeArgs(), RHS->getTypeArgs(),
	/stripKindOf=/true))
	return {};
	} else if (LHS->isSpecialized() != RHS->isSpecialized()) {
	// If only one has type arguments, the result will not have type
	// arguments.
	LHSTypeArgs = {};
	anyChanges = true;
	}

	// Compute the intersection of protocols.
	SmallVector<ObjCProtocolDecl *, 8> Protocols;
	getIntersectionOfProtocols(*this, LHS->getInterface(), Lptr, Rptr,
	Protocols);
	if (!Protocols.empty())
	anyChanges = true;

	// If anything in the LHS will have changed, build a new result type.
	// If we need to return a kindof type but LHS is not a kindof type, we
	// build a new result type.
	if (anyChanges \|\| LHS->isKindOfType() != anyKindOf) {
	QualType Result = getObjCInterfaceType(LHS->getInterface());
	Result = getObjCObjectType(Result, LHSTypeArgs, Protocols,
	anyKindOf \|\| LHS->isKindOfType());
	return getObjCObjectPointerType(Result);
	}

	return getObjCObjectPointerType(QualType(LHS, 0));
	}

	// Find the superclass.
	QualType LHSSuperType = LHS->getSuperClassType();
	if (LHSSuperType.isNull())
	break;

	LHS = LHSSuperType->castAs<ObjCObjectType>();
	}

	// We didn't find anything by following the LHS to its root; now check
	// the RHS against the cached set of ancestors.
	while (true) {
	auto KnownLHS = LHSAncestors.find(RHS->getInterface()->getCanonicalDecl());
	if (KnownLHS != LHSAncestors.end()) {
	LHS = KnownLHS->second;

	// Get the type arguments.
	ArrayRef<QualType> RHSTypeArgs = RHS->getTypeArgsAsWritten();
	bool anyChanges = false;
	if (LHS->isSpecialized() && RHS->isSpecialized()) {
	// Both have type arguments, compare them.
	if (!sameObjCTypeArgs(*this, LHS->getInterface(),
	LHS->getTypeArgs(), RHS->getTypeArgs(),
	/stripKindOf=/true))
	return {};
	} else if (LHS->isSpecialized() != RHS->isSpecialized()) {
	// If only one has type arguments, the result will not have type
	// arguments.
	RHSTypeArgs = {};
	anyChanges = true;
	}

	// Compute the intersection of protocols.
	SmallVector<ObjCProtocolDecl *, 8> Protocols;
	getIntersectionOfProtocols(*this, RHS->getInterface(), Lptr, Rptr,
	Protocols);
	if (!Protocols.empty())
	anyChanges = true;

	// If we need to return a kindof type but RHS is not a kindof type, we
	// build a new result type.
	if (anyChanges \|\| RHS->isKindOfType() != anyKindOf) {
	QualType Result = getObjCInterfaceType(RHS->getInterface());
	Result = getObjCObjectType(Result, RHSTypeArgs, Protocols,
	anyKindOf \|\| RHS->isKindOfType());
	return getObjCObjectPointerType(Result);
	}

	return getObjCObjectPointerType(QualType(RHS, 0));
	}

	// Find the superclass of the RHS.
	QualType RHSSuperType = RHS->getSuperClassType();
	if (RHSSuperType.isNull())
	break;

	RHS = RHSSuperType->castAs<ObjCObjectType>();
	}

	return {};
	}

	bool ASTContext::canAssignObjCInterfaces(const ObjCObjectType *LHS,
	const ObjCObjectType *RHS) {
	assert(LHS->getInterface() && "LHS is not an interface type");
	assert(RHS->getInterface() && "RHS is not an interface type");

	// Verify that the base decls are compatible: the RHS must be a subclass of
	// the LHS.
	ObjCInterfaceDecl *LHSInterface = LHS->getInterface();
	bool IsSuperClass = LHSInterface->isSuperClassOf(RHS->getInterface());
	if (!IsSuperClass)
	return false;

	// If the LHS has protocol qualifiers, determine whether all of them are
	// satisfied by the RHS (i.e., the RHS has a superset of the protocols in the
	// LHS).
	if (LHS->getNumProtocols() > 0) {
	// OK if conversion of LHS to SuperClass results in narrowing of types
	// ; i.e., SuperClass may implement at least one of the protocols
	// in LHS's protocol list. Example, SuperObj<P1> = lhs<P1,P2> is ok.
	// But not SuperObj<P1,P2,P3> = lhs<P1,P2>.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> SuperClassInheritedProtocols;
	CollectInheritedProtocols(RHS->getInterface(), SuperClassInheritedProtocols);
	// Also, if RHS has explicit quelifiers, include them for comparing with LHS's
	// qualifiers.
	for (auto *RHSPI : RHS->quals())
	CollectInheritedProtocols(RHSPI, SuperClassInheritedProtocols);
	// If there is no protocols associated with RHS, it is not a match.
	if (SuperClassInheritedProtocols.empty())
	return false;

	for (const auto *LHSProto : LHS->quals()) {
	bool SuperImplementsProtocol = false;
	for (auto *SuperClassProto : SuperClassInheritedProtocols)
	if (SuperClassProto->lookupProtocolNamed(LHSProto->getIdentifier())) {
	SuperImplementsProtocol = true;
	break;
	}
	if (!SuperImplementsProtocol)
	return false;
	}
	}

	// If the LHS is specialized, we may need to check type arguments.
	if (LHS->isSpecialized()) {
	// Follow the superclass chain until we've matched the LHS class in the
	// hierarchy. This substitutes type arguments through.
	const ObjCObjectType *RHSSuper = RHS;
	while (!declaresSameEntity(RHSSuper->getInterface(), LHSInterface))
	RHSSuper = RHSSuper->getSuperClassType()->castAs<ObjCObjectType>();

	// If the RHS is specializd, compare type arguments.
	if (RHSSuper->isSpecialized() &&
	!sameObjCTypeArgs(*this, LHS->getInterface(),
	LHS->getTypeArgs(), RHSSuper->getTypeArgs(),
	/stripKindOf=/true)) {
	return false;
	}
	}

	return true;
	}

	bool ASTContext::areComparableObjCPointerTypes(QualType LHS, QualType RHS) {
	// get the "pointed to" types
	const auto *LHSOPT = LHS->getAs<ObjCObjectPointerType>();
	const auto *RHSOPT = RHS->getAs<ObjCObjectPointerType>();

	if (!LHSOPT \|\| !RHSOPT)
	return false;

	return canAssignObjCInterfaces(LHSOPT, RHSOPT) \|\|
	canAssignObjCInterfaces(RHSOPT, LHSOPT);
	}

	bool ASTContext::canBindObjCObjectType(QualType To, QualType From) {
	return canAssignObjCInterfaces(
	getObjCObjectPointerType(To)->castAs<ObjCObjectPointerType>(),
	getObjCObjectPointerType(From)->castAs<ObjCObjectPointerType>());
	}

	/// typesAreCompatible - C99 6.7.3p9: For two qualified types to be compatible,
	/// both shall have the identically qualified version of a compatible type.
	/// C99 6.2.7p1: Two types have compatible types if their types are the
	/// same. See 6.7.[2,3,5] for additional rules.
	bool ASTContext::typesAreCompatible(QualType LHS, QualType RHS,
	bool CompareUnqualified) {
	if (getLangOpts().CPlusPlus)
	return hasSameType(LHS, RHS);

	return !mergeTypes(LHS, RHS, false, CompareUnqualified).isNull();
	}

	bool ASTContext::propertyTypesAreCompatible(QualType LHS, QualType RHS) {
	return typesAreCompatible(LHS, RHS);
	}

	bool ASTContext::typesAreBlockPointerCompatible(QualType LHS, QualType RHS) {
	return !mergeTypes(LHS, RHS, true).isNull();
	}

	/// mergeTransparentUnionType - if T is a transparent union type and a member
	/// of T is compatible with SubType, return the merged type, else return
	/// QualType()
	QualType ASTContext::mergeTransparentUnionType(QualType T, QualType SubType,
	bool OfBlockPointer,
	bool Unqualified) {
	if (const RecordType *UT = T->getAsUnionType()) {
	RecordDecl *UD = UT->getDecl();
	if (UD->hasAttr<TransparentUnionAttr>()) {
	for (const auto *I : UD->fields()) {
	QualType ET = I->getType().getUnqualifiedType();
	QualType MT = mergeTypes(ET, SubType, OfBlockPointer, Unqualified);
	if (!MT.isNull())
	return MT;
	}
	}
	}

	return {};
	}

	/// mergeFunctionParameterTypes - merge two types which appear as function
	/// parameter types
	QualType ASTContext::mergeFunctionParameterTypes(QualType lhs, QualType rhs,
	bool OfBlockPointer,
	bool Unqualified) {
	// GNU extension: two types are compatible if they appear as a function
	// argument, one of the types is a transparent union type and the other
	// type is compatible with a union member
	QualType lmerge = mergeTransparentUnionType(lhs, rhs, OfBlockPointer,
	Unqualified);
	if (!lmerge.isNull())
	return lmerge;

	QualType rmerge = mergeTransparentUnionType(rhs, lhs, OfBlockPointer,
	Unqualified);
	if (!rmerge.isNull())
	return rmerge;

	return mergeTypes(lhs, rhs, OfBlockPointer, Unqualified);
	}

	QualType ASTContext::mergeFunctionTypes(QualType lhs, QualType rhs,
	bool OfBlockPointer, bool Unqualified,
	bool AllowCXX,
	bool IsConditionalOperator) {
	const auto *lbase = lhs->castAs<FunctionType>();
	const auto *rbase = rhs->castAs<FunctionType>();
	const auto *lproto = dyn_cast<FunctionProtoType>(lbase);
	const auto *rproto = dyn_cast<FunctionProtoType>(rbase);
	bool allLTypes = true;
	bool allRTypes = true;

	// Check return type
	QualType retType;
	if (OfBlockPointer) {
	QualType RHS = rbase->getReturnType();
	QualType LHS = lbase->getReturnType();
	bool UnqualifiedResult = Unqualified;
	if (!UnqualifiedResult)
	UnqualifiedResult = (!RHS.hasQualifiers() && LHS.hasQualifiers());
	retType = mergeTypes(LHS, RHS, true, UnqualifiedResult, true);
	}
	else
	retType = mergeTypes(lbase->getReturnType(), rbase->getReturnType(), false,
	Unqualified);
	if (retType.isNull())
	return {};

	if (Unqualified)
	retType = retType.getUnqualifiedType();

	CanQualType LRetType = getCanonicalType(lbase->getReturnType());
	CanQualType RRetType = getCanonicalType(rbase->getReturnType());
	if (Unqualified) {
	LRetType = LRetType.getUnqualifiedType();
	RRetType = RRetType.getUnqualifiedType();
	}

	if (getCanonicalType(retType) != LRetType)
	allLTypes = false;
	if (getCanonicalType(retType) != RRetType)
	allRTypes = false;

	// FIXME: double check this
	// FIXME: should we error if lbase->getRegParmAttr() != 0 &&
	// rbase->getRegParmAttr() != 0 &&
	// lbase->getRegParmAttr() != rbase->getRegParmAttr()?
	FunctionType::ExtInfo lbaseInfo = lbase->getExtInfo();
	FunctionType::ExtInfo rbaseInfo = rbase->getExtInfo();

	// Compatible functions must have compatible calling conventions
	if (lbaseInfo.getCC() != rbaseInfo.getCC())
	return {};

	// Regparm is part of the calling convention.
	if (lbaseInfo.getHasRegParm() != rbaseInfo.getHasRegParm())
	return {};
	if (lbaseInfo.getRegParm() != rbaseInfo.getRegParm())
	return {};

	if (lbaseInfo.getProducesResult() != rbaseInfo.getProducesResult())
	return {};
	if (lbaseInfo.getNoCallerSavedRegs() != rbaseInfo.getNoCallerSavedRegs())
	return {};
	if (lbaseInfo.getNoCfCheck() != rbaseInfo.getNoCfCheck())
	return {};

	// When merging declarations, it's common for supplemental information like
	// attributes to only be present in one of the declarations, and we generally
	// want type merging to preserve the union of information. So a merged
	// function type should be noreturn if it was noreturn in either operand
	// type.
	//
	// But for the conditional operator, this is backwards. The result of the
	// operator could be either operand, and its type should conservatively
	// reflect that. So a function type in a composite type is noreturn only
	// if it's noreturn in both operand types.
	//
	// Arguably, noreturn is a kind of subtype, and the conditional operator
	// ought to produce the most specific common supertype of its operand types.
	// That would differ from this rule in contravariant positions. However,
	// neither C nor C++ generally uses this kind of subtype reasoning. Also,
	// as a practical matter, it would only affect C code that does abstraction of
	// higher-order functions (taking noreturn callbacks!), which is uncommon to
	// say the least. So we use the simpler rule.
	bool NoReturn = IsConditionalOperator
	? lbaseInfo.getNoReturn() && rbaseInfo.getNoReturn()
	: lbaseInfo.getNoReturn() \|\| rbaseInfo.getNoReturn();
	if (lbaseInfo.getNoReturn() != NoReturn)
	allLTypes = false;
	if (rbaseInfo.getNoReturn() != NoReturn)
	allRTypes = false;

	FunctionType::ExtInfo einfo = lbaseInfo.withNoReturn(NoReturn);

	if (lproto && rproto) { // two C99 style function prototypes
	assert((AllowCXX \|\|
	(!lproto->hasExceptionSpec() && !rproto->hasExceptionSpec())) &&
	"C++ shouldn't be here");
	// Compatible functions must have the same number of parameters
	if (lproto->getNumParams() != rproto->getNumParams())
	return {};

	// Variadic and non-variadic functions aren't compatible
	if (lproto->isVariadic() != rproto->isVariadic())
	return {};

	if (lproto->getMethodQuals() != rproto->getMethodQuals())
	return {};

	SmallVector<FunctionProtoType::ExtParameterInfo, 4> newParamInfos;
	bool canUseLeft, canUseRight;
	if (!mergeExtParameterInfo(lproto, rproto, canUseLeft, canUseRight,
	newParamInfos))
	return {};

	if (!canUseLeft)
	allLTypes = false;
	if (!canUseRight)
	allRTypes = false;

	// Check parameter type compatibility
	SmallVector<QualType, 10> types;
	for (unsigned i = 0, n = lproto->getNumParams(); i < n; i++) {
	QualType lParamType = lproto->getParamType(i).getUnqualifiedType();
	QualType rParamType = rproto->getParamType(i).getUnqualifiedType();
	QualType paramType = mergeFunctionParameterTypes(
	lParamType, rParamType, OfBlockPointer, Unqualified);
	if (paramType.isNull())
	return {};

	if (Unqualified)
	paramType = paramType.getUnqualifiedType();

	types.push_back(paramType);
	if (Unqualified) {
	lParamType = lParamType.getUnqualifiedType();
	rParamType = rParamType.getUnqualifiedType();
	}

	if (getCanonicalType(paramType) != getCanonicalType(lParamType))
	allLTypes = false;
	if (getCanonicalType(paramType) != getCanonicalType(rParamType))
	allRTypes = false;
	}

	if (allLTypes) return lhs;
	if (allRTypes) return rhs;

	FunctionProtoType::ExtProtoInfo EPI = lproto->getExtProtoInfo();
	EPI.ExtInfo = einfo;
	EPI.ExtParameterInfos =
	newParamInfos.empty() ? nullptr : newParamInfos.data();
	return getFunctionType(retType, types, EPI);
	}

	if (lproto) allRTypes = false;
	if (rproto) allLTypes = false;

	const FunctionProtoType *proto = lproto ? lproto : rproto;
	if (proto) {
	assert((AllowCXX \|\| !proto->hasExceptionSpec()) && "C++ shouldn't be here");
	if (proto->isVariadic())
	return {};
	// Check that the types are compatible with the types that
	// would result from default argument promotions (C99 6.7.5.3p15).
	// The only types actually affected are promotable integer
	// types and floats, which would be passed as a different
	// type depending on whether the prototype is visible.
	for (unsigned i = 0, n = proto->getNumParams(); i < n; ++i) {
	QualType paramTy = proto->getParamType(i);

	// Look at the converted type of enum types, since that is the type used
	// to pass enum values.
	if (const auto *Enum = paramTy->getAs<EnumType>()) {
	paramTy = Enum->getDecl()->getIntegerType();
	if (paramTy.isNull())
	return {};
	}

	if (isPromotableIntegerType(paramTy) \|\|
	getCanonicalType(paramTy).getUnqualifiedType() == FloatTy)
	return {};
	}

	if (allLTypes) return lhs;
	if (allRTypes) return rhs;

	FunctionProtoType::ExtProtoInfo EPI = proto->getExtProtoInfo();
	EPI.ExtInfo = einfo;
	return getFunctionType(retType, proto->getParamTypes(), EPI);
	}

	if (allLTypes) return lhs;
	if (allRTypes) return rhs;
	return getFunctionNoProtoType(retType, einfo);
	}

	/// Given that we have an enum type and a non-enum type, try to merge them.
	static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET,
	QualType other, bool isBlockReturnType) {
	// C99 6.7.2.2p4: Each enumerated type shall be compatible with char,
	// a signed integer type, or an unsigned integer type.
	// Compatibility is based on the underlying type, not the promotion
	// type.
	QualType underlyingType = ET->getDecl()->getIntegerType();
	if (underlyingType.isNull())
	return {};
	if (Context.hasSameType(underlyingType, other))
	return other;

	// In block return types, we're more permissive and accept any
	// integral type of the same size.
	if (isBlockReturnType && other->isIntegerType() &&
	Context.getTypeSize(underlyingType) == Context.getTypeSize(other))
	return other;

	return {};
	}

	QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, bool OfBlockPointer,
	bool Unqualified, bool BlockReturnType,
	bool IsConditionalOperator) {
	// For C++ we will not reach this code with reference types (see below),
	// for OpenMP variant call overloading we might.
	//
	// C++ [expr]: If an expression initially has the type "reference to T", the
	// type is adjusted to "T" prior to any further analysis, the expression
	// designates the object or function denoted by the reference, and the
	// expression is an lvalue unless the reference is an rvalue reference and
	// the expression is a function call (possibly inside parentheses).
	auto *LHSRefTy = LHS->getAs<ReferenceType>();
	auto *RHSRefTy = RHS->getAs<ReferenceType>();
	if (LangOpts.OpenMP && LHSRefTy && RHSRefTy &&
	LHS->getTypeClass() == RHS->getTypeClass())
	return mergeTypes(LHSRefTy->getPointeeType(), RHSRefTy->getPointeeType(),
	OfBlockPointer, Unqualified, BlockReturnType);
	if (LHSRefTy \|\| RHSRefTy)
	return {};

	if (Unqualified) {
	LHS = LHS.getUnqualifiedType();
	RHS = RHS.getUnqualifiedType();
	}

	QualType LHSCan = getCanonicalType(LHS),
	RHSCan = getCanonicalType(RHS);

	// If two types are identical, they are compatible.
	if (LHSCan == RHSCan)
	return LHS;

	// If the qualifiers are different, the types aren't compatible... mostly.
	Qualifiers LQuals = LHSCan.getLocalQualifiers();
	Qualifiers RQuals = RHSCan.getLocalQualifiers();
	if (LQuals != RQuals) {
	// If any of these qualifiers are different, we have a type
	// mismatch.
	if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() \|\|
	LQuals.getAddressSpace() != RQuals.getAddressSpace() \|\|
	LQuals.getObjCLifetime() != RQuals.getObjCLifetime() \|\|
	LQuals.hasUnaligned() != RQuals.hasUnaligned())
	return {};

	// Exactly one GC qualifier difference is allowed: __strong is
	// okay if the other type has no GC qualifier but is an Objective
	// C object pointer (i.e. implicitly strong by default). We fix
	// this by pretending that the unqualified type was actually
	// qualified __strong.
	Qualifiers::GC GC_L = LQuals.getObjCGCAttr();
	Qualifiers::GC GC_R = RQuals.getObjCGCAttr();
	assert((GC_L != GC_R) && "unequal qualifier sets had only equal elements");

	if (GC_L == Qualifiers::Weak \|\| GC_R == Qualifiers::Weak)
	return {};

	if (GC_L == Qualifiers::Strong && RHSCan->isObjCObjectPointerType()) {
	return mergeTypes(LHS, getObjCGCQualType(RHS, Qualifiers::Strong));
	}
	if (GC_R == Qualifiers::Strong && LHSCan->isObjCObjectPointerType()) {
	return mergeTypes(getObjCGCQualType(LHS, Qualifiers::Strong), RHS);
	}
	return {};
	}

	// Okay, qualifiers are equal.

	Type::TypeClass LHSClass = LHSCan->getTypeClass();
	Type::TypeClass RHSClass = RHSCan->getTypeClass();

	// We want to consider the two function types to be the same for these
	// comparisons, just force one to the other.
	if (LHSClass == Type::FunctionProto) LHSClass = Type::FunctionNoProto;
	if (RHSClass == Type::FunctionProto) RHSClass = Type::FunctionNoProto;

	// Same as above for arrays
	if (LHSClass == Type::VariableArray \|\| LHSClass == Type::IncompleteArray)
	LHSClass = Type::ConstantArray;
	if (RHSClass == Type::VariableArray \|\| RHSClass == Type::IncompleteArray)
	RHSClass = Type::ConstantArray;

	// ObjCInterfaces are just specialized ObjCObjects.
	if (LHSClass == Type::ObjCInterface) LHSClass = Type::ObjCObject;
	if (RHSClass == Type::ObjCInterface) RHSClass = Type::ObjCObject;

	// Canonicalize ExtVector -> Vector.
	if (LHSClass == Type::ExtVector) LHSClass = Type::Vector;
	if (RHSClass == Type::ExtVector) RHSClass = Type::Vector;

	// If the canonical type classes don't match.
	if (LHSClass != RHSClass) {
	// Note that we only have special rules for turning block enum
	// returns into block int returns, not vice-versa.
	if (const auto *ETy = LHS->getAs<EnumType>()) {
	return mergeEnumWithInteger(*this, ETy, RHS, false);
	}
	if (const EnumType* ETy = RHS->getAs<EnumType>()) {
	return mergeEnumWithInteger(*this, ETy, LHS, BlockReturnType);
	}
	// allow block pointer type to match an 'id' type.
	if (OfBlockPointer && !BlockReturnType) {
	if (LHS->isObjCIdType() && RHS->isBlockPointerType())
	return LHS;
	if (RHS->isObjCIdType() && LHS->isBlockPointerType())
	return RHS;
	}
	// Allow __auto_type to match anything; it merges to the type with more
	// information.
	if (const auto *AT = LHS->getAs<AutoType>()) {
	if (!AT->isDeduced() && AT->isGNUAutoType())
	return RHS;
	}
	if (const auto *AT = RHS->getAs<AutoType>()) {
	if (!AT->isDeduced() && AT->isGNUAutoType())
	return LHS;
	}
	return {};
	}

	// The canonical type classes match.
	switch (LHSClass) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("Non-canonical and dependent types shouldn't get here");

	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	case Type::LValueReference:
	case Type::RValueReference:
	case Type::MemberPointer:
	llvm_unreachable("C++ should never be in mergeTypes");

	case Type::ObjCInterface:
	case Type::IncompleteArray:
	case Type::VariableArray:
	case Type::FunctionProto:
	case Type::ExtVector:
	llvm_unreachable("Types are eliminated above");

	case Type::Pointer:
	{
	// Merge two pointer types, while trying to preserve typedef info
	QualType LHSPointee = LHS->castAs<PointerType>()->getPointeeType();
	QualType RHSPointee = RHS->castAs<PointerType>()->getPointeeType();
	if (Unqualified) {
	LHSPointee = LHSPointee.getUnqualifiedType();
	RHSPointee = RHSPointee.getUnqualifiedType();
	}
	QualType ResultType = mergeTypes(LHSPointee, RHSPointee, false,
	Unqualified);
	if (ResultType.isNull())
	return {};
	if (getCanonicalType(LHSPointee) == getCanonicalType(ResultType))
	return LHS;
	if (getCanonicalType(RHSPointee) == getCanonicalType(ResultType))
	return RHS;
	return getPointerType(ResultType);
	}
	case Type::BlockPointer:
	{
	// Merge two block pointer types, while trying to preserve typedef info
	QualType LHSPointee = LHS->castAs<BlockPointerType>()->getPointeeType();
	QualType RHSPointee = RHS->castAs<BlockPointerType>()->getPointeeType();
	if (Unqualified) {
	LHSPointee = LHSPointee.getUnqualifiedType();
	RHSPointee = RHSPointee.getUnqualifiedType();
	}
	if (getLangOpts().OpenCL) {
	Qualifiers LHSPteeQual = LHSPointee.getQualifiers();
	Qualifiers RHSPteeQual = RHSPointee.getQualifiers();
	// Blocks can't be an expression in a ternary operator (OpenCL v2.0
	// 6.12.5) thus the following check is asymmetric.
	if (!LHSPteeQual.isAddressSpaceSupersetOf(RHSPteeQual))
	return {};
	LHSPteeQual.removeAddressSpace();
	RHSPteeQual.removeAddressSpace();
	LHSPointee =
	QualType(LHSPointee.getTypePtr(), LHSPteeQual.getAsOpaqueValue());
	RHSPointee =
	QualType(RHSPointee.getTypePtr(), RHSPteeQual.getAsOpaqueValue());
	}
	QualType ResultType = mergeTypes(LHSPointee, RHSPointee, OfBlockPointer,
	Unqualified);
	if (ResultType.isNull())
	return {};
	if (getCanonicalType(LHSPointee) == getCanonicalType(ResultType))
	return LHS;
	if (getCanonicalType(RHSPointee) == getCanonicalType(ResultType))
	return RHS;
	return getBlockPointerType(ResultType);
	}
	case Type::Atomic:
	{
	// Merge two pointer types, while trying to preserve typedef info
	QualType LHSValue = LHS->castAs<AtomicType>()->getValueType();
	QualType RHSValue = RHS->castAs<AtomicType>()->getValueType();
	if (Unqualified) {
	LHSValue = LHSValue.getUnqualifiedType();
	RHSValue = RHSValue.getUnqualifiedType();
	}
	QualType ResultType = mergeTypes(LHSValue, RHSValue, false,
	Unqualified);
	if (ResultType.isNull())
	return {};
	if (getCanonicalType(LHSValue) == getCanonicalType(ResultType))
	return LHS;
	if (getCanonicalType(RHSValue) == getCanonicalType(ResultType))
	return RHS;
	return getAtomicType(ResultType);
	}
	case Type::ConstantArray:
	{
	const ConstantArrayType* LCAT = getAsConstantArrayType(LHS);
	const ConstantArrayType* RCAT = getAsConstantArrayType(RHS);
	if (LCAT && RCAT && RCAT->getSize() != LCAT->getSize())
	return {};

	QualType LHSElem = getAsArrayType(LHS)->getElementType();
	QualType RHSElem = getAsArrayType(RHS)->getElementType();
	if (Unqualified) {
	LHSElem = LHSElem.getUnqualifiedType();
	RHSElem = RHSElem.getUnqualifiedType();
	}

	QualType ResultType = mergeTypes(LHSElem, RHSElem, false, Unqualified);
	if (ResultType.isNull())
	return {};

	const VariableArrayType* LVAT = getAsVariableArrayType(LHS);
	const VariableArrayType* RVAT = getAsVariableArrayType(RHS);

	// If either side is a variable array, and both are complete, check whether
	// the current dimension is definite.
	if (LVAT \|\| RVAT) {
	auto SizeFetch = [this](const VariableArrayType* VAT,
	const ConstantArrayType* CAT)
	-> std::pair<bool,llvm::APInt> {
	if (VAT) {
	std::optional<llvm::APSInt> TheInt;
	Expr *E = VAT->getSizeExpr();
	if (E && (TheInt = E->getIntegerConstantExpr(*this)))
	return std::make_pair(true, *TheInt);
	return std::make_pair(false, llvm::APSInt());
	}
	if (CAT)
	return std::make_pair(true, CAT->getSize());
	return std::make_pair(false, llvm::APInt());
	};

	bool HaveLSize, HaveRSize;
	llvm::APInt LSize, RSize;
	std::tie(HaveLSize, LSize) = SizeFetch(LVAT, LCAT);
	std::tie(HaveRSize, RSize) = SizeFetch(RVAT, RCAT);
	if (HaveLSize && HaveRSize && !llvm::APInt::isSameValue(LSize, RSize))
	return {}; // Definite, but unequal, array dimension
	}

	if (LCAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType))
	return LHS;
	if (RCAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType))
	return RHS;
	if (LCAT)
	return getConstantArrayType(ResultType, LCAT->getSize(),
	LCAT->getSizeExpr(),
	ArrayType::ArraySizeModifier(), 0);
	if (RCAT)
	return getConstantArrayType(ResultType, RCAT->getSize(),
	RCAT->getSizeExpr(),
	ArrayType::ArraySizeModifier(), 0);
	if (LVAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType))
	return LHS;
	if (RVAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType))
	return RHS;
	if (LVAT) {
	// FIXME: This isn't correct! But tricky to implement because
	// the array's size has to be the size of LHS, but the type
	// has to be different.
	return LHS;
	}
	if (RVAT) {
	// FIXME: This isn't correct! But tricky to implement because
	// the array's size has to be the size of RHS, but the type
	// has to be different.
	return RHS;
	}
	if (getCanonicalType(LHSElem) == getCanonicalType(ResultType)) return LHS;
	if (getCanonicalType(RHSElem) == getCanonicalType(ResultType)) return RHS;
	return getIncompleteArrayType(ResultType,
	ArrayType::ArraySizeModifier(), 0);
	}
	case Type::FunctionNoProto:
	return mergeFunctionTypes(LHS, RHS, OfBlockPointer, Unqualified,
	/AllowCXX=/false, IsConditionalOperator);
	case Type::Record:
	case Type::Enum:
	return {};
	case Type::Builtin:
	// Only exactly equal builtin types are compatible, which is tested above.
	return {};
	case Type::Complex:
	// Distinct complex types are incompatible.
	return {};
	case Type::Vector:
	// FIXME: The merged type should be an ExtVector!
	if (areCompatVectorTypes(LHSCan->castAs<VectorType>(),
	RHSCan->castAs<VectorType>()))
	return LHS;
	return {};
	case Type::ConstantMatrix:
	if (areCompatMatrixTypes(LHSCan->castAs<ConstantMatrixType>(),
	RHSCan->castAs<ConstantMatrixType>()))
	return LHS;
	return {};
	case Type::ObjCObject: {
	// Check if the types are assignment compatible.
	// FIXME: This should be type compatibility, e.g. whether
	// "LHS x; RHS x;" at global scope is legal.
	if (canAssignObjCInterfaces(LHS->castAs<ObjCObjectType>(),
	RHS->castAs<ObjCObjectType>()))
	return LHS;
	return {};
	}
	case Type::ObjCObjectPointer:
	if (OfBlockPointer) {
	if (canAssignObjCInterfacesInBlockPointer(
	LHS->castAs<ObjCObjectPointerType>(),
	RHS->castAs<ObjCObjectPointerType>(), BlockReturnType))
	return LHS;
	return {};
	}
	if (canAssignObjCInterfaces(LHS->castAs<ObjCObjectPointerType>(),
	RHS->castAs<ObjCObjectPointerType>()))
	return LHS;
	return {};
	case Type::Pipe:
	assert(LHS != RHS &&
	"Equivalent pipe types should have already been handled!");
	return {};
	case Type::BitInt: {
	// Merge two bit-precise int types, while trying to preserve typedef info.
	bool LHSUnsigned = LHS->castAs<BitIntType>()->isUnsigned();
	bool RHSUnsigned = RHS->castAs<BitIntType>()->isUnsigned();
	unsigned LHSBits = LHS->castAs<BitIntType>()->getNumBits();
	unsigned RHSBits = RHS->castAs<BitIntType>()->getNumBits();

	// Like unsigned/int, shouldn't have a type if they don't match.
	if (LHSUnsigned != RHSUnsigned)
	return {};

	if (LHSBits != RHSBits)
	return {};
	return LHS;
	}
	}

	llvm_unreachable("Invalid Type::Class!");
	}

	bool ASTContext::mergeExtParameterInfo(
	const FunctionProtoType FirstFnType, const FunctionProtoType SecondFnType,
	bool &CanUseFirst, bool &CanUseSecond,
	SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &NewParamInfos) {
	assert(NewParamInfos.empty() && "param info list not empty");
	CanUseFirst = CanUseSecond = true;
	bool FirstHasInfo = FirstFnType->hasExtParameterInfos();
	bool SecondHasInfo = SecondFnType->hasExtParameterInfos();

	// Fast path: if the first type doesn't have ext parameter infos,
	// we match if and only if the second type also doesn't have them.
	if (!FirstHasInfo && !SecondHasInfo)
	return true;

	bool NeedParamInfo = false;
	size_t E = FirstHasInfo ? FirstFnType->getExtParameterInfos().size()
	: SecondFnType->getExtParameterInfos().size();

	for (size_t I = 0; I < E; ++I) {
	FunctionProtoType::ExtParameterInfo FirstParam, SecondParam;
	if (FirstHasInfo)
	FirstParam = FirstFnType->getExtParameterInfo(I);
	if (SecondHasInfo)
	SecondParam = SecondFnType->getExtParameterInfo(I);

	// Cannot merge unless everything except the noescape flag matches.
	if (FirstParam.withIsNoEscape(false) != SecondParam.withIsNoEscape(false))
	return false;

	bool FirstNoEscape = FirstParam.isNoEscape();
	bool SecondNoEscape = SecondParam.isNoEscape();
	bool IsNoEscape = FirstNoEscape && SecondNoEscape;
	NewParamInfos.push_back(FirstParam.withIsNoEscape(IsNoEscape));
	if (NewParamInfos.back().getOpaqueValue())
	NeedParamInfo = true;
	if (FirstNoEscape != IsNoEscape)
	CanUseFirst = false;
	if (SecondNoEscape != IsNoEscape)
	CanUseSecond = false;
	}

	if (!NeedParamInfo)
	NewParamInfos.clear();

	return true;
	}

	void ASTContext::ResetObjCLayout(const ObjCContainerDecl *CD) {
	ObjCLayouts[CD] = nullptr;
	}

	/// mergeObjCGCQualifiers - This routine merges ObjC's GC attribute of 'LHS' and
	/// 'RHS' attributes and returns the merged version; including for function
	/// return types.
	QualType ASTContext::mergeObjCGCQualifiers(QualType LHS, QualType RHS) {
	QualType LHSCan = getCanonicalType(LHS),
	RHSCan = getCanonicalType(RHS);
	// If two types are identical, they are compatible.
	if (LHSCan == RHSCan)
	return LHS;
	if (RHSCan->isFunctionType()) {
	if (!LHSCan->isFunctionType())
	return {};
	QualType OldReturnType =
	cast<FunctionType>(RHSCan.getTypePtr())->getReturnType();
	QualType NewReturnType =
	cast<FunctionType>(LHSCan.getTypePtr())->getReturnType();
	QualType ResReturnType =
	mergeObjCGCQualifiers(NewReturnType, OldReturnType);
	if (ResReturnType.isNull())
	return {};
	if (ResReturnType == NewReturnType \|\| ResReturnType == OldReturnType) {
	// id foo(); ... __strong id foo(); or: __strong id foo(); ... id foo();
	// In either case, use OldReturnType to build the new function type.
	const auto *F = LHS->castAs<FunctionType>();
	if (const auto *FPT = cast<FunctionProtoType>(F)) {
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.ExtInfo = getFunctionExtInfo(LHS);
	QualType ResultType =
	getFunctionType(OldReturnType, FPT->getParamTypes(), EPI);
	return ResultType;
	}
	}
	return {};
	}

	// If the qualifiers are different, the types can still be merged.
	Qualifiers LQuals = LHSCan.getLocalQualifiers();
	Qualifiers RQuals = RHSCan.getLocalQualifiers();
	if (LQuals != RQuals) {
	// If any of these qualifiers are different, we have a type mismatch.
	if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() \|\|
	LQuals.getAddressSpace() != RQuals.getAddressSpace())
	return {};

	// Exactly one GC qualifier difference is allowed: __strong is
	// okay if the other type has no GC qualifier but is an Objective
	// C object pointer (i.e. implicitly strong by default). We fix
	// this by pretending that the unqualified type was actually
	// qualified __strong.
	Qualifiers::GC GC_L = LQuals.getObjCGCAttr();
	Qualifiers::GC GC_R = RQuals.getObjCGCAttr();
	assert((GC_L != GC_R) && "unequal qualifier sets had only equal elements");

	if (GC_L == Qualifiers::Weak \|\| GC_R == Qualifiers::Weak)
	return {};

	if (GC_L == Qualifiers::Strong)
	return LHS;
	if (GC_R == Qualifiers::Strong)
	return RHS;
	return {};
	}

	if (LHSCan->isObjCObjectPointerType() && RHSCan->isObjCObjectPointerType()) {
	QualType LHSBaseQT = LHS->castAs<ObjCObjectPointerType>()->getPointeeType();
	QualType RHSBaseQT = RHS->castAs<ObjCObjectPointerType>()->getPointeeType();
	QualType ResQT = mergeObjCGCQualifiers(LHSBaseQT, RHSBaseQT);
	if (ResQT == LHSBaseQT)
	return LHS;
	if (ResQT == RHSBaseQT)
	return RHS;
	}
	return {};
	}

	//===----------------------------------------------------------------------===//
	// Integer Predicates
	//===----------------------------------------------------------------------===//

	unsigned ASTContext::getIntWidth(QualType T) const {
	if (const auto *ET = T->getAs<EnumType>())
	T = ET->getDecl()->getIntegerType();
	if (T->isBooleanType())
	return 1;
	if (const auto *EIT = T->getAs<BitIntType>())
	return EIT->getNumBits();
	// For builtin types, just use the standard type sizing method
	return (unsigned)getTypeSize(T);
	}

	QualType ASTContext::getCorrespondingUnsignedType(QualType T) const {
	assert((T->hasIntegerRepresentation() \|\| T->isEnumeralType() \|\|
	T->isFixedPointType()) &&
	"Unexpected type");

	// Turn <4 x signed int> -> <4 x unsigned int>
	if (const auto *VTy = T->getAs<VectorType>())
	return getVectorType(getCorrespondingUnsignedType(VTy->getElementType()),
	VTy->getNumElements(), VTy->getVectorKind());

	// For _BitInt, return an unsigned _BitInt with same width.
	if (const auto *EITy = T->getAs<BitIntType>())
	return getBitIntType(/Unsigned=/true, EITy->getNumBits());

	// For enums, get the underlying integer type of the enum, and let the general
	// integer type signchanging code handle it.
	if (const auto *ETy = T->getAs<EnumType>())
	T = ETy->getDecl()->getIntegerType();

	switch (T->castAs<BuiltinType>()->getKind()) {
	case BuiltinType::Char_U:
	// Plain `char` is mapped to `unsigned char` even if it's already unsigned
	case BuiltinType::Char_S:
	case BuiltinType::SChar:
	case BuiltinType::Char8:
	return UnsignedCharTy;
	case BuiltinType::Short:
	return UnsignedShortTy;
	case BuiltinType::Int:
	return UnsignedIntTy;
	case BuiltinType::Long:
	return UnsignedLongTy;
	case BuiltinType::LongLong:
	return UnsignedLongLongTy;
	case BuiltinType::Int128:
	return UnsignedInt128Ty;
	// wchar_t is special. It is either signed or not, but when it's signed,
	// there's no matching "unsigned wchar_t". Therefore we return the unsigned
	// version of its underlying type instead.
	case BuiltinType::WChar_S:
	return getUnsignedWCharType();

	case BuiltinType::ShortAccum:
	return UnsignedShortAccumTy;
	case BuiltinType::Accum:
	return UnsignedAccumTy;
	case BuiltinType::LongAccum:
	return UnsignedLongAccumTy;
	case BuiltinType::SatShortAccum:
	return SatUnsignedShortAccumTy;
	case BuiltinType::SatAccum:
	return SatUnsignedAccumTy;
	case BuiltinType::SatLongAccum:
	return SatUnsignedLongAccumTy;
	case BuiltinType::ShortFract:
	return UnsignedShortFractTy;
	case BuiltinType::Fract:
	return UnsignedFractTy;
	case BuiltinType::LongFract:
	return UnsignedLongFractTy;
	case BuiltinType::SatShortFract:
	return SatUnsignedShortFractTy;
	case BuiltinType::SatFract:
	return SatUnsignedFractTy;
	case BuiltinType::SatLongFract:
	return SatUnsignedLongFractTy;
	default:
	assert((T->hasUnsignedIntegerRepresentation() \|\|
	T->isUnsignedFixedPointType()) &&
	"Unexpected signed integer or fixed point type");
	return T;
	}
	}

	QualType ASTContext::getCorrespondingSignedType(QualType T) const {
	assert((T->hasIntegerRepresentation() \|\| T->isEnumeralType() \|\|
	T->isFixedPointType()) &&
	"Unexpected type");

	// Turn <4 x unsigned int> -> <4 x signed int>
	if (const auto *VTy = T->getAs<VectorType>())
	return getVectorType(getCorrespondingSignedType(VTy->getElementType()),
	VTy->getNumElements(), VTy->getVectorKind());

	// For _BitInt, return a signed _BitInt with same width.
	if (const auto *EITy = T->getAs<BitIntType>())
	return getBitIntType(/Unsigned=/false, EITy->getNumBits());

	// For enums, get the underlying integer type of the enum, and let the general
	// integer type signchanging code handle it.
	if (const auto *ETy = T->getAs<EnumType>())
	T = ETy->getDecl()->getIntegerType();

	switch (T->castAs<BuiltinType>()->getKind()) {
	case BuiltinType::Char_S:
	// Plain `char` is mapped to `signed char` even if it's already signed
	case BuiltinType::Char_U:
	case BuiltinType::UChar:
	case BuiltinType::Char8:
	return SignedCharTy;
	case BuiltinType::UShort:
	return ShortTy;
	case BuiltinType::UInt:
	return IntTy;
	case BuiltinType::ULong:
	return LongTy;
	case BuiltinType::ULongLong:
	return LongLongTy;
	case BuiltinType::UInt128:
	return Int128Ty;
	// wchar_t is special. It is either unsigned or not, but when it's unsigned,
	// there's no matching "signed wchar_t". Therefore we return the signed
	// version of its underlying type instead.
	case BuiltinType::WChar_U:
	return getSignedWCharType();

	case BuiltinType::UShortAccum:
	return ShortAccumTy;
	case BuiltinType::UAccum:
	return AccumTy;
	case BuiltinType::ULongAccum:
	return LongAccumTy;
	case BuiltinType::SatUShortAccum:
	return SatShortAccumTy;
	case BuiltinType::SatUAccum:
	return SatAccumTy;
	case BuiltinType::SatULongAccum:
	return SatLongAccumTy;
	case BuiltinType::UShortFract:
	return ShortFractTy;
	case BuiltinType::UFract:
	return FractTy;
	case BuiltinType::ULongFract:
	return LongFractTy;
	case BuiltinType::SatUShortFract:
	return SatShortFractTy;
	case BuiltinType::SatUFract:
	return SatFractTy;
	case BuiltinType::SatULongFract:
	return SatLongFractTy;
	default:
	assert(
	(T->hasSignedIntegerRepresentation() \|\| T->isSignedFixedPointType()) &&
	"Unexpected signed integer or fixed point type");
	return T;
	}
	}

	ASTMutationListener::~ASTMutationListener() = default;

	void ASTMutationListener::DeducedReturnType(const FunctionDecl *FD,
	QualType ReturnType) {}

	//===----------------------------------------------------------------------===//
	// Builtin Type Computation
	//===----------------------------------------------------------------------===//

	/// DecodeTypeFromStr - This decodes one type descriptor from Str, advancing the
	/// pointer over the consumed characters. This returns the resultant type. If
	/// AllowTypeModifiers is false then modifier like * are not parsed, just basic
	/// types. This allows "v2i*" to be parsed as a pointer to a v2i instead of
	/// a vector of "i*".
	///
	/// RequiresICE is filled in on return to indicate whether the value is required
	/// to be an Integer Constant Expression.
	static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
	ASTContext::GetBuiltinTypeError &Error,
	bool &RequiresICE,
	bool AllowTypeModifiers) {
	// Modifiers.
	int HowLong = 0;
	bool Signed = false, Unsigned = false;
	RequiresICE = false;

	// Read the prefixed modifiers first.
	bool Done = false;
	#ifndef NDEBUG
	bool IsSpecial = false;
	#endif
	while (!Done) {
	switch (*Str++) {
	default: Done = true; --Str; break;
	case 'I':
	RequiresICE = true;
	break;
	case 'S':
	assert(!Unsigned && "Can't use both 'S' and 'U' modifiers!");
	assert(!Signed && "Can't use 'S' modifier multiple times!");
	Signed = true;
	break;
	case 'U':
	assert(!Signed && "Can't use both 'S' and 'U' modifiers!");
	assert(!Unsigned && "Can't use 'U' modifier multiple times!");
	Unsigned = true;
	break;
	case 'L':
	assert(!IsSpecial && "Can't use 'L' with 'W', 'N', 'Z' or 'O' modifiers");
	assert(HowLong <= 2 && "Can't have LLLL modifier");
	++HowLong;
	break;
	case 'N':
	// 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	if (Context.getTargetInfo().getLongWidth() == 32)
	++HowLong;
	break;
	case 'W':
	// This modifier represents int64 type.
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'W' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	switch (Context.getTargetInfo().getInt64Type()) {
	default:
	llvm_unreachable("Unexpected integer type");
	case TargetInfo::SignedLong:
	HowLong = 1;
	break;
	case TargetInfo::SignedLongLong:
	HowLong = 2;
	break;
	}
	break;
	case 'Z':
	// This modifier represents int32 type.
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'Z' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	switch (Context.getTargetInfo().getIntTypeByWidth(32, true)) {
	default:
	llvm_unreachable("Unexpected integer type");
	case TargetInfo::SignedInt:
	HowLong = 0;
	break;
	case TargetInfo::SignedLong:
	HowLong = 1;
	break;
	case TargetInfo::SignedLongLong:
	HowLong = 2;
	break;
	}
	break;
	case 'O':
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'O' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	if (Context.getLangOpts().OpenCL)
	HowLong = 1;
	else
	HowLong = 2;
	break;
	}
	}

	QualType Type;

	// Read the base type.
	switch (*Str++) {
	default: llvm_unreachable("Unknown builtin type letter!");
	case 'x':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'x'!");
	Type = Context.Float16Ty;
	break;
	case 'y':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'y'!");
	Type = Context.BFloat16Ty;
	break;
	case 'v':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'v'!");
	Type = Context.VoidTy;
	break;
	case 'h':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'h'!");
	Type = Context.HalfTy;
	break;
	case 'f':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'f'!");
	Type = Context.FloatTy;
	break;
	case 'd':
	assert(HowLong < 3 && !Signed && !Unsigned &&
	"Bad modifiers used with 'd'!");
	if (HowLong == 1)
	Type = Context.LongDoubleTy;
	else if (HowLong == 2)
	Type = Context.Float128Ty;
	else
	Type = Context.DoubleTy;
	break;
	case 's':
	assert(HowLong == 0 && "Bad modifiers used with 's'!");
	if (Unsigned)
	Type = Context.UnsignedShortTy;
	else
	Type = Context.ShortTy;
	break;
	case 'i':
	if (HowLong == 3)
	Type = Unsigned ? Context.UnsignedInt128Ty : Context.Int128Ty;
	else if (HowLong == 2)
	Type = Unsigned ? Context.UnsignedLongLongTy : Context.LongLongTy;
	else if (HowLong == 1)
	Type = Unsigned ? Context.UnsignedLongTy : Context.LongTy;
	else
	Type = Unsigned ? Context.UnsignedIntTy : Context.IntTy;
	break;
	case 'c':
	assert(HowLong == 0 && "Bad modifiers used with 'c'!");
	if (Signed)
	Type = Context.SignedCharTy;
	else if (Unsigned)
	Type = Context.UnsignedCharTy;
	else
	Type = Context.CharTy;
	break;
	case 'b': // boolean
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'b'!");
	Type = Context.BoolTy;
	break;
	case 'z': // size_t.
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'z'!");
	Type = Context.getSizeType();
	break;
	case 'w': // wchar_t.
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'w'!");
	Type = Context.getWideCharType();
	break;
	case 'F':
	Type = Context.getCFConstantStringType();
	break;
	case 'G':
	Type = Context.getObjCIdType();
	break;
	case 'H':
	Type = Context.getObjCSelType();
	break;
	case 'M':
	Type = Context.getObjCSuperType();
	break;
	case 'a':
	Type = Context.getBuiltinVaListType();
	assert(!Type.isNull() && "builtin va list type not initialized!");
	break;
	case 'A':
	// This is a "reference" to a va_list; however, what exactly
	// this means depends on how va_list is defined. There are two
	// different kinds of va_list: ones passed by value, and ones
	// passed by reference. An example of a by-value va_list is
	// x86, where va_list is a char*. An example of by-ref va_list
	// is x86-64, where va_list is a __va_list_tag[1]. For x86,
	// we want this argument to be a char*&; for x86-64, we want
	// it to be a __va_list_tag*.
	Type = Context.getBuiltinVaListType();
	assert(!Type.isNull() && "builtin va list type not initialized!");
	if (Type->isArrayType())
	Type = Context.getArrayDecayedType(Type);
	else
	Type = Context.getLValueReferenceType(Type);
	break;
	case 'q': {
	char *End;
	unsigned NumElements = strtoul(Str, &End, 10);
	assert(End != Str && "Missing vector size");
	Str = End;

	QualType ElementType = DecodeTypeFromStr(Str, Context, Error,
	RequiresICE, false);
	assert(!RequiresICE && "Can't require vector ICE");

	Type = Context.getScalableVectorType(ElementType, NumElements);
	break;
	}
	case 'V': {
	char *End;
	unsigned NumElements = strtoul(Str, &End, 10);
	assert(End != Str && "Missing vector size");
	Str = End;

	QualType ElementType = DecodeTypeFromStr(Str, Context, Error,
	RequiresICE, false);
	assert(!RequiresICE && "Can't require vector ICE");

	// TODO: No way to make AltiVec vectors in builtins yet.
	Type = Context.getVectorType(ElementType, NumElements,
	VectorType::GenericVector);
	break;
	}
	case 'E': {
	char *End;

	unsigned NumElements = strtoul(Str, &End, 10);
	assert(End != Str && "Missing vector size");

	Str = End;

	QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE,
	false);
	Type = Context.getExtVectorType(ElementType, NumElements);
	break;
	}
	case 'X': {
	QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE,
	false);
	assert(!RequiresICE && "Can't require complex ICE");
	Type = Context.getComplexType(ElementType);
	break;
	}
	case 'Y':
	Type = Context.getPointerDiffType();
	break;
	case 'P':
	Type = Context.getFILEType();
	if (Type.isNull()) {
	Error = ASTContext::GE_Missing_stdio;
	return {};
	}
	break;
	case 'J':
	if (Signed)
	Type = Context.getsigjmp_bufType();
	else
	Type = Context.getjmp_bufType();

	if (Type.isNull()) {
	Error = ASTContext::GE_Missing_setjmp;
	return {};
	}
	break;
	case 'K':
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'K'!");
	Type = Context.getucontext_tType();

	if (Type.isNull()) {
	Error = ASTContext::GE_Missing_ucontext;
	return {};
	}
	break;
	case 'p':
	Type = Context.getProcessIDType();
	break;
	}

	// If there are modifiers and if we're allowed to parse them, go for it.
	Done = !AllowTypeModifiers;
	while (!Done) {
	switch (char c = *Str++) {
	default: Done = true; --Str; break;
	case '*':
	case '&': {
	// Both pointers and references can have their pointee types
	// qualified with an address space.
	char *End;
	unsigned AddrSpace = strtoul(Str, &End, 10);
	if (End != Str) {
	// Note AddrSpace == 0 is not the same as an unspecified address space.
	Type = Context.getAddrSpaceQualType(
	Type,
	Context.getLangASForBuiltinAddressSpace(AddrSpace));
	Str = End;
	}
	if (c == '*')
	Type = Context.getPointerType(Type);
	else
	Type = Context.getLValueReferenceType(Type);
	break;
	}
	// FIXME: There's no way to have a built-in with an rvalue ref arg.
	case 'C':
	Type = Type.withConst();
	break;
	case 'D':
	Type = Context.getVolatileType(Type);
	break;
	case 'R':
	Type = Type.withRestrict();
	break;
	}
	}

	assert((!RequiresICE \|\| Type->isIntegralOrEnumerationType()) &&
	"Integer constant 'I' type must be an integer");

	return Type;
	}

	// On some targets such as PowerPC, some of the builtins are defined with custom
	// type descriptors for target-dependent types. These descriptors are decoded in
	// other functions, but it may be useful to be able to fall back to default
	// descriptor decoding to define builtins mixing target-dependent and target-
	// independent types. This function allows decoding one type descriptor with
	// default decoding.
	QualType ASTContext::DecodeTypeStr(const char *&Str, const ASTContext &Context,
	GetBuiltinTypeError &Error, bool &RequireICE,
	bool AllowTypeModifiers) const {
	return DecodeTypeFromStr(Str, Context, Error, RequireICE, AllowTypeModifiers);
	}

	/// GetBuiltinType - Return the type for the specified builtin.
	QualType ASTContext::GetBuiltinType(unsigned Id,
	GetBuiltinTypeError &Error,
	unsigned *IntegerConstantArgs) const {
	const char *TypeStr = BuiltinInfo.getTypeString(Id);
	if (TypeStr[0] == '\0') {
	Error = GE_Missing_type;
	return {};
	}

	SmallVector<QualType, 8> ArgTypes;

	bool RequiresICE = false;
	Error = GE_None;
	QualType ResType = DecodeTypeFromStr(TypeStr, *this, Error,
	RequiresICE, true);
	if (Error != GE_None)
	return {};

	assert(!RequiresICE && "Result of intrinsic cannot be required to be an ICE");

	while (TypeStr[0] && TypeStr[0] != '.') {
	QualType Ty = DecodeTypeFromStr(TypeStr, *this, Error, RequiresICE, true);
	if (Error != GE_None)
	return {};

	// If this argument is required to be an IntegerConstantExpression and the
	// caller cares, fill in the bitmask we return.
	if (RequiresICE && IntegerConstantArgs)
	*IntegerConstantArgs \|= 1 << ArgTypes.size();

	// Do array -> pointer decay. The builtin should use the decayed type.
	if (Ty->isArrayType())
	Ty = getArrayDecayedType(Ty);

	ArgTypes.push_back(Ty);
	}

	if (Id == Builtin::BI__GetExceptionInfo)
	return {};

	assert((TypeStr[0] != '.' \|\| TypeStr[1] == 0) &&
	"'.' should only occur at end of builtin type list!");

	bool Variadic = (TypeStr[0] == '.');

	FunctionType::ExtInfo EI(getDefaultCallingConvention(
	Variadic, /IsCXXMethod=/false, /IsBuiltin=/true));
	if (BuiltinInfo.isNoReturn(Id)) EI = EI.withNoReturn(true);


	// We really shouldn't be making a no-proto type here.
	if (ArgTypes.empty() && Variadic && !getLangOpts().requiresStrictPrototypes())
	return getFunctionNoProtoType(ResType, EI);

	FunctionProtoType::ExtProtoInfo EPI;
	EPI.ExtInfo = EI;
	EPI.Variadic = Variadic;
	if (getLangOpts().CPlusPlus && BuiltinInfo.isNoThrow(Id))
	EPI.ExceptionSpec.Type =
	getLangOpts().CPlusPlus11 ? EST_BasicNoexcept : EST_DynamicNone;

	return getFunctionType(ResType, ArgTypes, EPI);
	}

	static GVALinkage basicGVALinkageForFunction(const ASTContext &Context,
	const FunctionDecl *FD) {
	if (!FD->isExternallyVisible())
	return GVA_Internal;

	// Non-user-provided functions get emitted as weak definitions with every
	// use, no matter whether they've been explicitly instantiated etc.
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FD))
	if (!MD->isUserProvided())
	return GVA_DiscardableODR;

	GVALinkage External;
	switch (FD->getTemplateSpecializationKind()) {
	case TSK_Undeclared:
	case TSK_ExplicitSpecialization:
	External = GVA_StrongExternal;
	break;

	case TSK_ExplicitInstantiationDefinition:
	return GVA_StrongODR;

	// C++11 [temp.explicit]p10:
	// [ Note: The intent is that an inline function that is the subject of
	// an explicit instantiation declaration will still be implicitly
	// instantiated when used so that the body can be considered for
	// inlining, but that no out-of-line copy of the inline function would be
	// generated in the translation unit. -- end note ]
	case TSK_ExplicitInstantiationDeclaration:
	return GVA_AvailableExternally;

	case TSK_ImplicitInstantiation:
	External = GVA_DiscardableODR;
	break;
	}

	if (!FD->isInlined())
	return External;

	if ((!Context.getLangOpts().CPlusPlus &&
	!Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	!FD->hasAttr<DLLExportAttr>()) \|\|
	FD->hasAttr<GNUInlineAttr>()) {
	// FIXME: This doesn't match gcc's behavior for dllexport inline functions.

	// GNU or C99 inline semantics. Determine whether this symbol should be
	// externally visible.
	if (FD->isInlineDefinitionExternallyVisible())
	return External;

	// C99 inline semantics, where the symbol is not externally visible.
	return GVA_AvailableExternally;
	}

	// Functions specified with extern and inline in -fms-compatibility mode
	// forcibly get emitted. While the body of the function cannot be later
	// replaced, the function definition cannot be discarded.
	if (FD->isMSExternInline())
	return GVA_StrongODR;

	return GVA_DiscardableODR;
	}

	static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
	const Decl *D, GVALinkage L) {
	// See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx
	// dllexport/dllimport on inline functions.
	if (D->hasAttr<DLLImportAttr>()) {
	if (L == GVA_DiscardableODR \|\| L == GVA_StrongODR)
	return GVA_AvailableExternally;
	} else if (D->hasAttr<DLLExportAttr>()) {
	if (L == GVA_DiscardableODR)
	return GVA_StrongODR;
	} else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) {
	// Device-side functions with __global__ attribute must always be
	// visible externally so they can be launched from host.
	if (D->hasAttr<CUDAGlobalAttr>() &&
	(L == GVA_DiscardableODR \|\| L == GVA_Internal))
	return GVA_StrongODR;
	// Single source offloading languages like CUDA/HIP need to be able to
	// access static device variables from host code of the same compilation
	// unit. This is done by externalizing the static variable with a shared
	// name between the host and device compilation which is the same for the
	// same compilation unit whereas different among different compilation
	// units.
	if (Context.shouldExternalize(D))
	return GVA_StrongExternal;
	}
	return L;
	}

	/// Adjust the GVALinkage for a declaration based on what an external AST source
	/// knows about whether there can be other definitions of this declaration.
	static GVALinkage
	adjustGVALinkageForExternalDefinitionKind(const ASTContext &Ctx, const Decl *D,
	GVALinkage L) {
	ExternalASTSource *Source = Ctx.getExternalSource();
	if (!Source)
	return L;

	switch (Source->hasExternalDefinitions(D)) {
	case ExternalASTSource::EK_Never:
	// Other translation units rely on us to provide the definition.
	if (L == GVA_DiscardableODR)
	return GVA_StrongODR;
	break;

	case ExternalASTSource::EK_Always:
	return GVA_AvailableExternally;

	case ExternalASTSource::EK_ReplyHazy:
	break;
	}
	return L;
	}

	GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const {
	return adjustGVALinkageForExternalDefinitionKind(*this, FD,
	adjustGVALinkageForAttributes(*this, FD,
	basicGVALinkageForFunction(*this, FD)));
	}

	static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
	const VarDecl *VD) {
	if (!VD->isExternallyVisible())
	return GVA_Internal;

	if (VD->isStaticLocal()) {
	const DeclContext *LexicalContext = VD->getParentFunctionOrMethod();
	while (LexicalContext && !isa<FunctionDecl>(LexicalContext))
	LexicalContext = LexicalContext->getLexicalParent();

	// ObjC Blocks can create local variables that don't have a FunctionDecl
	// LexicalContext.
	if (!LexicalContext)
	return GVA_DiscardableODR;

	// Otherwise, let the static local variable inherit its linkage from the
	// nearest enclosing function.
	auto StaticLocalLinkage =
	Context.GetGVALinkageForFunction(cast<FunctionDecl>(LexicalContext));

	// Itanium ABI 5.2.2: "Each COMDAT group [for a static local variable] must
	// be emitted in any object with references to the symbol for the object it
	// contains, whether inline or out-of-line."
	// Similar behavior is observed with MSVC. An alternative ABI could use
	// StrongODR/AvailableExternally to match the function, but none are
	// known/supported currently.
	if (StaticLocalLinkage == GVA_StrongODR \|\|
	StaticLocalLinkage == GVA_AvailableExternally)
	return GVA_DiscardableODR;
	return StaticLocalLinkage;
	}

	// MSVC treats in-class initialized static data members as definitions.
	// By giving them non-strong linkage, out-of-line definitions won't
	// cause link errors.
	if (Context.isMSStaticDataMemberInlineDefinition(VD))
	return GVA_DiscardableODR;

	// Most non-template variables have strong linkage; inline variables are
	// linkonce_odr or (occasionally, for compatibility) weak_odr.
	GVALinkage StrongLinkage;
	switch (Context.getInlineVariableDefinitionKind(VD)) {
	case ASTContext::InlineVariableDefinitionKind::None:
	StrongLinkage = GVA_StrongExternal;
	break;
	case ASTContext::InlineVariableDefinitionKind::Weak:
	case ASTContext::InlineVariableDefinitionKind::WeakUnknown:
	StrongLinkage = GVA_DiscardableODR;
	break;
	case ASTContext::InlineVariableDefinitionKind::Strong:
	StrongLinkage = GVA_StrongODR;
	break;
	}

	switch (VD->getTemplateSpecializationKind()) {
	case TSK_Undeclared:
	return StrongLinkage;

	case TSK_ExplicitSpecialization:
	return Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	VD->isStaticDataMember()
	? GVA_StrongODR
	: StrongLinkage;

	case TSK_ExplicitInstantiationDefinition:
	return GVA_StrongODR;

	case TSK_ExplicitInstantiationDeclaration:
	return GVA_AvailableExternally;

	case TSK_ImplicitInstantiation:
	return GVA_DiscardableODR;
	}

	llvm_unreachable("Invalid Linkage!");
	}

	GVALinkage ASTContext::GetGVALinkageForVariable(const VarDecl *VD) {
	return adjustGVALinkageForExternalDefinitionKind(*this, VD,
	adjustGVALinkageForAttributes(*this, VD,
	basicGVALinkageForVariable(*this, VD)));
	}

	bool ASTContext::DeclMustBeEmitted(const Decl *D) {
	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	if (!VD->isFileVarDecl())
	return false;
	// Global named register variables (GNU extension) are never emitted.
	if (VD->getStorageClass() == SC_Register)
	return false;
	if (VD->getDescribedVarTemplate() \|\|
	isa<VarTemplatePartialSpecializationDecl>(VD))
	return false;
	} else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// We never need to emit an uninstantiated function template.
	if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
	return false;
	} else if (isa<PragmaCommentDecl>(D))
	return true;
	else if (isa<PragmaDetectMismatchDecl>(D))
	return true;
	else if (isa<OMPRequiresDecl>(D))
	return true;
	else if (isa<OMPThreadPrivateDecl>(D))
	return !D->getDeclContext()->isDependentContext();
	else if (isa<OMPAllocateDecl>(D))
	return !D->getDeclContext()->isDependentContext();
	else if (isa<OMPDeclareReductionDecl>(D) \|\| isa<OMPDeclareMapperDecl>(D))
	return !D->getDeclContext()->isDependentContext();
	else if (isa<ImportDecl>(D))
	return true;
	else
	return false;

	// If this is a member of a class template, we do not need to emit it.
	if (D->getDeclContext()->isDependentContext())
	return false;

	// Weak references don't produce any output by themselves.
	if (D->hasAttr<WeakRefAttr>())
	return false;

	// Aliases and used decls are required.
	if (D->hasAttr<AliasAttr>() \|\| D->hasAttr<UsedAttr>())
	return true;

	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// Forward declarations aren't required.
	if (!FD->doesThisDeclarationHaveABody())
	return FD->doesDeclarationForceExternallyVisibleDefinition();

	// Constructors and destructors are required.
	if (FD->hasAttr<ConstructorAttr>() \|\| FD->hasAttr<DestructorAttr>())
	return true;

	// The key function for a class is required. This rule only comes
	// into play when inline functions can be key functions, though.
	if (getTargetInfo().getCXXABI().canKeyFunctionBeInline()) {
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
	const CXXRecordDecl *RD = MD->getParent();
	if (MD->isOutOfLine() && RD->isDynamicClass()) {
	const CXXMethodDecl *KeyFunc = getCurrentKeyFunction(RD);
	if (KeyFunc && KeyFunc->getCanonicalDecl() == MD->getCanonicalDecl())
	return true;
	}
	}
	}

	GVALinkage Linkage = GetGVALinkageForFunction(FD);

	// static, static inline, always_inline, and extern inline functions can
	// always be deferred. Normal inline functions can be deferred in C99/C++.
	// Implicit template instantiations can also be deferred in C++.
	return !isDiscardableGVALinkage(Linkage);
	}

	const auto *VD = cast<VarDecl>(D);
	assert(VD->isFileVarDecl() && "Expected file scoped var");

	// If the decl is marked as `declare target to`, it should be emitted for the
	// host and for the device.
	if (LangOpts.OpenMP &&
	OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
	return true;

	if (VD->isThisDeclarationADefinition() == VarDecl::DeclarationOnly &&
	!isMSStaticDataMemberInlineDefinition(VD))
	return false;

	// Variables that can be needed in other TUs are required.
	auto Linkage = GetGVALinkageForVariable(VD);
	if (!isDiscardableGVALinkage(Linkage))
	return true;

	// We never need to emit a variable that is available in another TU.
	if (Linkage == GVA_AvailableExternally)
	return false;

	// Variables that have destruction with side-effects are required.
	if (VD->needsDestruction(*this))
	return true;

	// Variables that have initialization with side-effects are required.
	if (VD->getInit() && VD->getInit()->HasSideEffects(*this) &&
	// We can get a value-dependent initializer during error recovery.
	(VD->getInit()->isValueDependent() \|\| !VD->evaluateValue()))
	return true;

	// Likewise, variables with tuple-like bindings are required if their
	// bindings have side-effects.
	if (const auto *DD = dyn_cast<DecompositionDecl>(VD))
	for (const auto *BD : DD->bindings())
	if (const auto *BindingVD = BD->getHoldingVar())
	if (DeclMustBeEmitted(BindingVD))
	return true;

	return false;
	}

	void ASTContext::forEachMultiversionedFunctionVersion(
	const FunctionDecl *FD,
	llvm::function_ref<void(FunctionDecl *)> Pred) const {
	assert(FD->isMultiVersion() && "Only valid for multiversioned functions");
	llvm::SmallDenseSet<const FunctionDecl*, 4> SeenDecls;
	FD = FD->getMostRecentDecl();
	// FIXME: The order of traversal here matters and depends on the order of
	// lookup results, which happens to be (mostly) oldest-to-newest, but we
	// shouldn't rely on that.
	for (auto *CurDecl :
	FD->getDeclContext()->getRedeclContext()->lookup(FD->getDeclName())) {
	FunctionDecl *CurFD = CurDecl->getAsFunction()->getMostRecentDecl();
	if (CurFD && hasSameType(CurFD->getType(), FD->getType()) &&
	!llvm::is_contained(SeenDecls, CurFD)) {
	SeenDecls.insert(CurFD);
	Pred(CurFD);
	}
	}
	}

	CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
	bool IsCXXMethod,
	bool IsBuiltin) const {
	// Pass through to the C++ ABI object
	if (IsCXXMethod)
	return ABI->getDefaultMethodCallConv(IsVariadic);

	// Builtins ignore user-specified default calling convention and remain the
	// Target's default calling convention.
	if (!IsBuiltin) {
	switch (LangOpts.getDefaultCallingConv()) {
	case LangOptions::DCC_None:
	break;
	case LangOptions::DCC_CDecl:
	return CC_C;
	case LangOptions::DCC_FastCall:
	if (getTargetInfo().hasFeature("sse2") && !IsVariadic)
	return CC_X86FastCall;
	break;
	case LangOptions::DCC_StdCall:
	if (!IsVariadic)
	return CC_X86StdCall;
	break;
	case LangOptions::DCC_VectorCall:
	// __vectorcall cannot be applied to variadic functions.
	if (!IsVariadic)
	return CC_X86VectorCall;
	break;
	case LangOptions::DCC_RegCall:
	// __regcall cannot be applied to variadic functions.
	if (!IsVariadic)
	return CC_X86RegCall;
	break;
	}
	}
	return Target->getDefaultCallingConv();
	}

	bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const {
	// Pass through to the C++ ABI object
	return ABI->isNearlyEmpty(RD);
	}

	VTableContextBase *ASTContext::getVTableContext() {
	if (!VTContext.get()) {
	auto ABI = Target->getCXXABI();
	if (ABI.isMicrosoft())
	VTContext.reset(new MicrosoftVTableContext(*this));
	else {
	auto ComponentLayout = getLangOpts().RelativeCXXABIVTables
	? ItaniumVTableContext::Relative
	: ItaniumVTableContext::Pointer;
	VTContext.reset(new ItaniumVTableContext(*this, ComponentLayout));
	}
	}
	return VTContext.get();
	}

	MangleContext ASTContext::createMangleContext(const TargetInfo T) {
	if (!T)
	T = Target;
	switch (T->getCXXABI().getKind()) {
	case TargetCXXABI::AppleARM64:
	case TargetCXXABI::Fuchsia:
	case TargetCXXABI::GenericAArch64:
	case TargetCXXABI::GenericItanium:
	case TargetCXXABI::GenericARM:
	case TargetCXXABI::GenericMIPS:
	case TargetCXXABI::iOS:
	case TargetCXXABI::WebAssembly:
	case TargetCXXABI::WatchOS:
	case TargetCXXABI::XL:
	return ItaniumMangleContext::create(*this, getDiagnostics());
	case TargetCXXABI::Microsoft:
	return MicrosoftMangleContext::create(*this, getDiagnostics());
	}
	llvm_unreachable("Unsupported ABI");
	}

	MangleContext *ASTContext::createDeviceMangleContext(const TargetInfo &T) {
	assert(T.getCXXABI().getKind() != TargetCXXABI::Microsoft &&
	"Device mangle context does not support Microsoft mangling.");
	switch (T.getCXXABI().getKind()) {
	case TargetCXXABI::AppleARM64:
	case TargetCXXABI::Fuchsia:
	case TargetCXXABI::GenericAArch64:
	case TargetCXXABI::GenericItanium:
	case TargetCXXABI::GenericARM:
	case TargetCXXABI::GenericMIPS:
	case TargetCXXABI::iOS:
	case TargetCXXABI::WebAssembly:
	case TargetCXXABI::WatchOS:
	case TargetCXXABI::XL:
	return ItaniumMangleContext::create(
	*this, getDiagnostics(),
	[](ASTContext &, const NamedDecl *ND) -> std::optional<unsigned> {
	if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
	return RD->getDeviceLambdaManglingNumber();
	return std::nullopt;
	},
	/IsAux=/true);
	case TargetCXXABI::Microsoft:
	return MicrosoftMangleContext::create(*this, getDiagnostics(),
	/IsAux=/true);
	}
	llvm_unreachable("Unsupported ABI");
	}

	CXXABI::~CXXABI() = default;

	size_t ASTContext::getSideTableAllocatedMemory() const {
	return ASTRecordLayouts.getMemorySize() +
	llvm::capacity_in_bytes(ObjCLayouts) +
	llvm::capacity_in_bytes(KeyFunctions) +
	llvm::capacity_in_bytes(ObjCImpls) +
	llvm::capacity_in_bytes(BlockVarCopyInits) +
	llvm::capacity_in_bytes(DeclAttrs) +
	llvm::capacity_in_bytes(TemplateOrInstantiation) +
	llvm::capacity_in_bytes(InstantiatedFromUsingDecl) +
	llvm::capacity_in_bytes(InstantiatedFromUsingShadowDecl) +
	llvm::capacity_in_bytes(InstantiatedFromUnnamedFieldDecl) +
	llvm::capacity_in_bytes(OverriddenMethods) +
	llvm::capacity_in_bytes(Types) +
	llvm::capacity_in_bytes(VariableArrayTypes);
	}

	/// getIntTypeForBitwidth -
	/// sets integer QualTy according to specified details:
	/// bitwidth, signed/unsigned.
	/// Returns empty type if there is no appropriate target types.
	QualType ASTContext::getIntTypeForBitwidth(unsigned DestWidth,
	unsigned Signed) const {
	TargetInfo::IntType Ty = getTargetInfo().getIntTypeByWidth(DestWidth, Signed);
	CanQualType QualTy = getFromTargetType(Ty);
	if (!QualTy && DestWidth == 128)
	return Signed ? Int128Ty : UnsignedInt128Ty;
	return QualTy;
	}

	/// getRealTypeForBitwidth -
	/// sets floating point QualTy according to specified bitwidth.
	/// Returns empty type if there is no appropriate target types.
	QualType ASTContext::getRealTypeForBitwidth(unsigned DestWidth,
	FloatModeKind ExplicitType) const {
	FloatModeKind Ty =
	getTargetInfo().getRealTypeByWidth(DestWidth, ExplicitType);
	switch (Ty) {
	case FloatModeKind::Half:
	return HalfTy;
	case FloatModeKind::Float:
	return FloatTy;
	case FloatModeKind::Double:
	return DoubleTy;
	case FloatModeKind::LongDouble:
	return LongDoubleTy;
	case FloatModeKind::Float128:
	return Float128Ty;
	case FloatModeKind::Ibm128:
	return Ibm128Ty;
	case FloatModeKind::NoFloat:
	return {};
	}

	llvm_unreachable("Unhandled TargetInfo::RealType value");
	}

	void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
	if (Number > 1)
	MangleNumbers[ND] = Number;
	}

	unsigned ASTContext::getManglingNumber(const NamedDecl *ND,
	bool ForAuxTarget) const {
	auto I = MangleNumbers.find(ND);
	unsigned Res = I != MangleNumbers.end() ? I->second : 1;
	// CUDA/HIP host compilation encodes host and device mangling numbers
	// as lower and upper half of 32 bit integer.
	if (LangOpts.CUDA && !LangOpts.CUDAIsDevice) {
	Res = ForAuxTarget ? Res >> 16 : Res & 0xFFFF;
	} else {
	assert(!ForAuxTarget && "Only CUDA/HIP host compilation supports mangling "
	"number for aux target");
	}
	return Res > 1 ? Res : 1;
	}

	void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
	if (Number > 1)
	StaticLocalNumbers[VD] = Number;
	}

	unsigned ASTContext::getStaticLocalNumber(const VarDecl *VD) const {
	auto I = StaticLocalNumbers.find(VD);
	return I != StaticLocalNumbers.end() ? I->second : 1;
	}

	MangleNumberingContext &
	ASTContext::getManglingNumberContext(const DeclContext *DC) {
	assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
	std::unique_ptr<MangleNumberingContext> &MCtx = MangleNumberingContexts[DC];
	if (!MCtx)
	MCtx = createMangleNumberingContext();
	return *MCtx;
	}

	MangleNumberingContext &
	ASTContext::getManglingNumberContext(NeedExtraManglingDecl_t, const Decl *D) {
	assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
	std::unique_ptr<MangleNumberingContext> &MCtx =
	ExtraMangleNumberingContexts[D];
	if (!MCtx)
	MCtx = createMangleNumberingContext();
	return *MCtx;
	}

	std::unique_ptr<MangleNumberingContext>
	ASTContext::createMangleNumberingContext() const {
	return ABI->createMangleNumberingContext();
	}

	const CXXConstructorDecl *
	ASTContext::getCopyConstructorForExceptionObject(CXXRecordDecl *RD) {
	return ABI->getCopyConstructorForExceptionObject(
	cast<CXXRecordDecl>(RD->getFirstDecl()));
	}

	void ASTContext::addCopyConstructorForExceptionObject(CXXRecordDecl *RD,
	CXXConstructorDecl *CD) {
	return ABI->addCopyConstructorForExceptionObject(
	cast<CXXRecordDecl>(RD->getFirstDecl()),
	cast<CXXConstructorDecl>(CD->getFirstDecl()));
	}

	void ASTContext::addTypedefNameForUnnamedTagDecl(TagDecl *TD,
	TypedefNameDecl *DD) {
	return ABI->addTypedefNameForUnnamedTagDecl(TD, DD);
	}

	TypedefNameDecl *
	ASTContext::getTypedefNameForUnnamedTagDecl(const TagDecl *TD) {
	return ABI->getTypedefNameForUnnamedTagDecl(TD);
	}

	void ASTContext::addDeclaratorForUnnamedTagDecl(TagDecl *TD,
	DeclaratorDecl *DD) {
	return ABI->addDeclaratorForUnnamedTagDecl(TD, DD);
	}

	DeclaratorDecl ASTContext::getDeclaratorForUnnamedTagDecl(const TagDecl TD) {
	return ABI->getDeclaratorForUnnamedTagDecl(TD);
	}

	void ASTContext::setParameterIndex(const ParmVarDecl *D, unsigned int index) {
	ParamIndices[D] = index;
	}

	unsigned ASTContext::getParameterIndex(const ParmVarDecl *D) const {
	ParameterIndexTable::const_iterator I = ParamIndices.find(D);
	assert(I != ParamIndices.end() &&
	"ParmIndices lacks entry set by ParmVarDecl");
	return I->second;
	}

	QualType ASTContext::getStringLiteralArrayType(QualType EltTy,
	unsigned Length) const {
	// A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
	if (getLangOpts().CPlusPlus \|\| getLangOpts().ConstStrings)
	EltTy = EltTy.withConst();

	EltTy = adjustStringLiteralBaseType(EltTy);

	// Get an array type for the string, according to C99 6.4.5. This includes
	// the null terminator character.
	return getConstantArrayType(EltTy, llvm::APInt(32, Length + 1), nullptr,
	ArrayType::Normal, /IndexTypeQuals/ 0);
	}

	StringLiteral *
	ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
	StringLiteral *&Result = StringLiteralCache[Key];
	if (!Result)
	Result = StringLiteral::Create(
	*this, Key, StringLiteral::Ordinary,
	/Pascal/ false, getStringLiteralArrayType(CharTy, Key.size()),
	SourceLocation());
	return Result;
	}

	MSGuidDecl *
	ASTContext::getMSGuidDecl(MSGuidDecl::Parts Parts) const {
	assert(MSGuidTagDecl && "building MS GUID without MS extensions?");

	llvm::FoldingSetNodeID ID;
	MSGuidDecl::Profile(ID, Parts);

	void *InsertPos;
	if (MSGuidDecl *Existing = MSGuidDecls.FindNodeOrInsertPos(ID, InsertPos))
	return Existing;

	QualType GUIDType = getMSGuidType().withConst();
	MSGuidDecl New = MSGuidDecl::Create(this, GUIDType, Parts);
	MSGuidDecls.InsertNode(New, InsertPos);
	return New;
	}

	UnnamedGlobalConstantDecl *
	ASTContext::getUnnamedGlobalConstantDecl(QualType Ty,
	const APValue &APVal) const {
	llvm::FoldingSetNodeID ID;
	UnnamedGlobalConstantDecl::Profile(ID, Ty, APVal);

	void *InsertPos;
	if (UnnamedGlobalConstantDecl *Existing =
	UnnamedGlobalConstantDecls.FindNodeOrInsertPos(ID, InsertPos))
	return Existing;

	UnnamedGlobalConstantDecl *New =
	UnnamedGlobalConstantDecl::Create(*this, Ty, APVal);
	UnnamedGlobalConstantDecls.InsertNode(New, InsertPos);
	return New;
	}

	TemplateParamObjectDecl *
	ASTContext::getTemplateParamObjectDecl(QualType T, const APValue &V) const {
	assert(T->isRecordType() && "template param object of unexpected type");

	// C++ [temp.param]p8:
	// [...] a static storage duration object of type 'const T' [...]
	T.addConst();

	llvm::FoldingSetNodeID ID;
	TemplateParamObjectDecl::Profile(ID, T, V);

	void *InsertPos;
	if (TemplateParamObjectDecl *Existing =
	TemplateParamObjectDecls.FindNodeOrInsertPos(ID, InsertPos))
	return Existing;

	TemplateParamObjectDecl New = TemplateParamObjectDecl::Create(this, T, V);
	TemplateParamObjectDecls.InsertNode(New, InsertPos);
	return New;
	}

	bool ASTContext::AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const {
	const llvm::Triple &T = getTargetInfo().getTriple();
	if (!T.isOSDarwin())
	return false;

	if (!(T.isiOS() && T.isOSVersionLT(7)) &&
	!(T.isMacOSX() && T.isOSVersionLT(10, 9)))
	return false;

	QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
	CharUnits sizeChars = getTypeSizeInChars(AtomicTy);
	uint64_t Size = sizeChars.getQuantity();
	CharUnits alignChars = getTypeAlignInChars(AtomicTy);
	unsigned Align = alignChars.getQuantity();
	unsigned MaxInlineWidthInBits = getTargetInfo().getMaxAtomicInlineWidth();
	return (Size != Align \|\| toBits(sizeChars) > MaxInlineWidthInBits);
	}

	bool
	ASTContext::ObjCMethodsAreEqual(const ObjCMethodDecl *MethodDecl,
	const ObjCMethodDecl *MethodImpl) {
	// No point trying to match an unavailable/deprecated mothod.
	if (MethodDecl->hasAttr<UnavailableAttr>()
	\|\| MethodDecl->hasAttr<DeprecatedAttr>())
	return false;
	if (MethodDecl->getObjCDeclQualifier() !=
	MethodImpl->getObjCDeclQualifier())
	return false;
	if (!hasSameType(MethodDecl->getReturnType(), MethodImpl->getReturnType()))
	return false;

	if (MethodDecl->param_size() != MethodImpl->param_size())
	return false;

	for (ObjCMethodDecl::param_const_iterator IM = MethodImpl->param_begin(),
	IF = MethodDecl->param_begin(), EM = MethodImpl->param_end(),
	EF = MethodDecl->param_end();
	IM != EM && IF != EF; ++IM, ++IF) {
	const ParmVarDecl DeclVar = (IF);
	const ParmVarDecl ImplVar = (IM);
	if (ImplVar->getObjCDeclQualifier() != DeclVar->getObjCDeclQualifier())
	return false;
	if (!hasSameType(DeclVar->getType(), ImplVar->getType()))
	return false;
	}

	return (MethodDecl->isVariadic() == MethodImpl->isVariadic());
	}

	uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const {
	LangAS AS;
	if (QT->getUnqualifiedDesugaredType()->isNullPtrType())
	AS = LangAS::Default;
	else
	AS = QT->getPointeeType().getAddressSpace();

	return getTargetInfo().getNullPointerValue(AS);
	}

	unsigned ASTContext::getTargetAddressSpace(LangAS AS) const {
	return getTargetInfo().getTargetAddressSpace(AS);
	}

	bool ASTContext::hasSameExpr(const Expr X, const Expr Y) const {
	if (X == Y)
	return true;
	if (!X \|\| !Y)
	return false;
	llvm::FoldingSetNodeID IDX, IDY;
	X->Profile(IDX, this, /Canonical=*/true);
	Y->Profile(IDY, this, /Canonical=*/true);
	return IDX == IDY;
	}

	// The getCommon* helpers return, for given 'same' X and Y entities given as
	// inputs, another entity which is also the 'same' as the inputs, but which
	// is closer to the canonical form of the inputs, each according to a given
	// criteria.
	// The getCommon*Checked variants are 'null inputs not-allowed' equivalents of
	// the regular ones.

	static Decl getCommonDecl(Decl X, Decl *Y) {
	if (!declaresSameEntity(X, Y))
	return nullptr;
	for (const Decl *DX : X->redecls()) {
	// If we reach Y before reaching the first decl, that means X is older.
	if (DX == Y)
	return X;
	// If we reach the first decl, then Y is older.
	if (DX->isFirstDecl())
	return Y;
	}
	llvm_unreachable("Corrupt redecls chain");
	}

	template <class T, std::enable_if_t<std::is_base_of_v<Decl, T>, bool> = true>
	static T getCommonDecl(T X, T *Y) {
	return cast_or_null<T>(
	getCommonDecl(const_cast<Decl *>(cast_or_null<Decl>(X)),
	const_cast<Decl *>(cast_or_null<Decl>(Y))));
	}

	template <class T, std::enable_if_t<std::is_base_of_v<Decl, T>, bool> = true>
	static T getCommonDeclChecked(T X, T *Y) {
	return cast<T>(getCommonDecl(const_cast<Decl *>(cast<Decl>(X)),
	const_cast<Decl *>(cast<Decl>(Y))));
	}

	static TemplateName getCommonTemplateName(ASTContext &Ctx, TemplateName X,
	TemplateName Y) {
	if (X.getAsVoidPointer() == Y.getAsVoidPointer())
	return X;
	// FIXME: There are cases here where we could find a common template name
	// with more sugar. For example one could be a SubstTemplateTemplate*
	// replacing the other.
	TemplateName CX = Ctx.getCanonicalTemplateName(X);
	if (CX.getAsVoidPointer() !=
	Ctx.getCanonicalTemplateName(Y).getAsVoidPointer())
	return TemplateName();
	return CX;
	}

	static TemplateName
	getCommonTemplateNameChecked(ASTContext &Ctx, TemplateName X, TemplateName Y) {
	TemplateName R = getCommonTemplateName(Ctx, X, Y);
	assert(R.getAsVoidPointer() != nullptr);
	return R;
	}

	static auto getCommonTypes(ASTContext &Ctx, ArrayRef<QualType> Xs,
	ArrayRef<QualType> Ys, bool Unqualified = false) {
	assert(Xs.size() == Ys.size());
	SmallVector<QualType, 8> Rs(Xs.size());
	for (size_t I = 0; I < Rs.size(); ++I)
	Rs[I] = Ctx.getCommonSugaredType(Xs[I], Ys[I], Unqualified);
	return Rs;
	}

	template <class T>
	static SourceLocation getCommonAttrLoc(const T X, const T Y) {
	return X->getAttributeLoc() == Y->getAttributeLoc() ? X->getAttributeLoc()
	: SourceLocation();
	}

	static TemplateArgument getCommonTemplateArgument(ASTContext &Ctx,
	const TemplateArgument &X,
	const TemplateArgument &Y) {
	if (X.getKind() != Y.getKind())
	return TemplateArgument();

	switch (X.getKind()) {
	case TemplateArgument::ArgKind::Type:
	if (!Ctx.hasSameType(X.getAsType(), Y.getAsType()))
	return TemplateArgument();
	return TemplateArgument(
	Ctx.getCommonSugaredType(X.getAsType(), Y.getAsType()));
	case TemplateArgument::ArgKind::NullPtr:
	if (!Ctx.hasSameType(X.getNullPtrType(), Y.getNullPtrType()))
	return TemplateArgument();
	return TemplateArgument(
	Ctx.getCommonSugaredType(X.getNullPtrType(), Y.getNullPtrType()),
	/Unqualified=/true);
	case TemplateArgument::ArgKind::Expression:
	if (!Ctx.hasSameType(X.getAsExpr()->getType(), Y.getAsExpr()->getType()))
	return TemplateArgument();
	// FIXME: Try to keep the common sugar.
	return X;
	case TemplateArgument::ArgKind::Template: {
	TemplateName TX = X.getAsTemplate(), TY = Y.getAsTemplate();
	TemplateName CTN = ::getCommonTemplateName(Ctx, TX, TY);
	if (!CTN.getAsVoidPointer())
	return TemplateArgument();
	return TemplateArgument(CTN);
	}
	case TemplateArgument::ArgKind::TemplateExpansion: {
	TemplateName TX = X.getAsTemplateOrTemplatePattern(),
	TY = Y.getAsTemplateOrTemplatePattern();
	TemplateName CTN = ::getCommonTemplateName(Ctx, TX, TY);
	if (!CTN.getAsVoidPointer())
	return TemplateName();
	auto NExpX = X.getNumTemplateExpansions();
	assert(NExpX == Y.getNumTemplateExpansions());
	return TemplateArgument(CTN, NExpX);
	}
	default:
	// FIXME: Handle the other argument kinds.
	return X;
	}
	}

	static bool getCommonTemplateArguments(ASTContext &Ctx,
	SmallVectorImpl<TemplateArgument> &R,
	ArrayRef<TemplateArgument> Xs,
	ArrayRef<TemplateArgument> Ys) {
	if (Xs.size() != Ys.size())
	return true;
	R.resize(Xs.size());
	for (size_t I = 0; I < R.size(); ++I) {
	R[I] = getCommonTemplateArgument(Ctx, Xs[I], Ys[I]);
	if (R[I].isNull())
	return true;
	}
	return false;
	}

	static auto getCommonTemplateArguments(ASTContext &Ctx,
	ArrayRef<TemplateArgument> Xs,
	ArrayRef<TemplateArgument> Ys) {
	SmallVector<TemplateArgument, 8> R;
	bool Different = getCommonTemplateArguments(Ctx, R, Xs, Ys);
	assert(!Different);
	(void)Different;
	return R;
	}

	template <class T>
	static ElaboratedTypeKeyword getCommonTypeKeyword(const T X, const T Y) {
	return X->getKeyword() == Y->getKeyword() ? X->getKeyword()
	: ElaboratedTypeKeyword::ETK_None;
	}

	template <class T>
	static NestedNameSpecifier getCommonNNS(ASTContext &Ctx, const T X,
	const T *Y) {
	// FIXME: Try to keep the common NNS sugar.
	return X->getQualifier() == Y->getQualifier()
	? X->getQualifier()
	: Ctx.getCanonicalNestedNameSpecifier(X->getQualifier());
	}

	template <class T>
	static QualType getCommonElementType(ASTContext &Ctx, const T X, const T Y) {
	return Ctx.getCommonSugaredType(X->getElementType(), Y->getElementType());
	}

	template <class T>
	static QualType getCommonArrayElementType(ASTContext &Ctx, const T *X,
	Qualifiers &QX, const T *Y,
	Qualifiers &QY) {
	QualType EX = X->getElementType(), EY = Y->getElementType();
	QualType R = Ctx.getCommonSugaredType(EX, EY,
	/Unqualified=/true);
	Qualifiers RQ = R.getQualifiers();
	QX += EX.getQualifiers() - RQ;
	QY += EY.getQualifiers() - RQ;
	return R;
	}

	template <class T>
	static QualType getCommonPointeeType(ASTContext &Ctx, const T X, const T Y) {
	return Ctx.getCommonSugaredType(X->getPointeeType(), Y->getPointeeType());
	}

	template <class T> static auto getCommonSizeExpr(ASTContext &Ctx, T X, T *Y) {
	assert(Ctx.hasSameExpr(X->getSizeExpr(), Y->getSizeExpr()));
	return X->getSizeExpr();
	}

	static auto getCommonSizeModifier(const ArrayType X, const ArrayType Y) {
	assert(X->getSizeModifier() == Y->getSizeModifier());
	return X->getSizeModifier();
	}

	static auto getCommonIndexTypeCVRQualifiers(const ArrayType *X,
	const ArrayType *Y) {
	assert(X->getIndexTypeCVRQualifiers() == Y->getIndexTypeCVRQualifiers());
	return X->getIndexTypeCVRQualifiers();
	}

	// Merges two type lists such that the resulting vector will contain
	// each type (in a canonical sense) only once, in the order they appear
	// from X to Y. If they occur in both X and Y, the result will contain
	// the common sugared type between them.
	static void mergeTypeLists(ASTContext &Ctx, SmallVectorImpl<QualType> &Out,
	ArrayRef<QualType> X, ArrayRef<QualType> Y) {
	llvm::DenseMap<QualType, unsigned> Found;
	for (auto Ts : {X, Y}) {
	for (QualType T : Ts) {
	auto Res = Found.try_emplace(Ctx.getCanonicalType(T), Out.size());
	if (!Res.second) {
	QualType &U = Out[Res.first->second];
	U = Ctx.getCommonSugaredType(U, T);
	} else {
	Out.emplace_back(T);
	}
	}
	}
	}

	FunctionProtoType::ExceptionSpecInfo
	ASTContext::mergeExceptionSpecs(FunctionProtoType::ExceptionSpecInfo ESI1,
	FunctionProtoType::ExceptionSpecInfo ESI2,
	SmallVectorImpl<QualType> &ExceptionTypeStorage,
	bool AcceptDependent) {
	ExceptionSpecificationType EST1 = ESI1.Type, EST2 = ESI2.Type;

	// If either of them can throw anything, that is the result.
	for (auto I : {EST_None, EST_MSAny, EST_NoexceptFalse}) {
	if (EST1 == I)
	return ESI1;
	if (EST2 == I)
	return ESI2;
	}

	// If either of them is non-throwing, the result is the other.
	for (auto I :
	{EST_NoThrow, EST_DynamicNone, EST_BasicNoexcept, EST_NoexceptTrue}) {
	if (EST1 == I)
	return ESI2;
	if (EST2 == I)
	return ESI1;
	}

	// If we're left with value-dependent computed noexcept expressions, we're
	// stuck. Before C++17, we can just drop the exception specification entirely,
	// since it's not actually part of the canonical type. And this should never
	// happen in C++17, because it would mean we were computing the composite
	// pointer type of dependent types, which should never happen.
	if (EST1 == EST_DependentNoexcept \|\| EST2 == EST_DependentNoexcept) {
	assert(AcceptDependent &&
	"computing composite pointer type of dependent types");
	return FunctionProtoType::ExceptionSpecInfo();
	}

	// Switch over the possibilities so that people adding new values know to
	// update this function.
	switch (EST1) {
	case EST_None:
	case EST_DynamicNone:
	case EST_MSAny:
	case EST_BasicNoexcept:
	case EST_DependentNoexcept:
	case EST_NoexceptFalse:
	case EST_NoexceptTrue:
	case EST_NoThrow:
	llvm_unreachable("These ESTs should be handled above");

	case EST_Dynamic: {
	// This is the fun case: both exception specifications are dynamic. Form
	// the union of the two lists.
	assert(EST2 == EST_Dynamic && "other cases should already be handled");
	mergeTypeLists(*this, ExceptionTypeStorage, ESI1.Exceptions,
	ESI2.Exceptions);
	FunctionProtoType::ExceptionSpecInfo Result(EST_Dynamic);
	Result.Exceptions = ExceptionTypeStorage;
	return Result;
	}

	case EST_Unevaluated:
	case EST_Uninstantiated:
	case EST_Unparsed:
	llvm_unreachable("shouldn't see unresolved exception specifications here");
	}

	llvm_unreachable("invalid ExceptionSpecificationType");
	}

	static QualType getCommonNonSugarTypeNode(ASTContext &Ctx, const Type *X,
	Qualifiers &QX, const Type *Y,
	Qualifiers &QY) {
	Type::TypeClass TC = X->getTypeClass();
	assert(TC == Y->getTypeClass());
	switch (TC) {
	#define UNEXPECTED_TYPE(Class, Kind) \
	case Type::Class: \
	llvm_unreachable("Unexpected " Kind ": " #Class);

	#define NON_CANONICAL_TYPE(Class, Base) UNEXPECTED_TYPE(Class, "non-canonical")
	#define TYPE(Class, Base)
	#include "clang/AST/TypeNodes.inc"

	#define SUGAR_FREE_TYPE(Class) UNEXPECTED_TYPE(Class, "sugar-free")
	SUGAR_FREE_TYPE(Builtin)
	SUGAR_FREE_TYPE(Decltype)
	SUGAR_FREE_TYPE(DeducedTemplateSpecialization)
	SUGAR_FREE_TYPE(DependentBitInt)
	SUGAR_FREE_TYPE(Enum)
	SUGAR_FREE_TYPE(BitInt)
	SUGAR_FREE_TYPE(ObjCInterface)
	SUGAR_FREE_TYPE(Record)
	SUGAR_FREE_TYPE(SubstTemplateTypeParmPack)
	SUGAR_FREE_TYPE(UnresolvedUsing)
	#undef SUGAR_FREE_TYPE
	#define NON_UNIQUE_TYPE(Class) UNEXPECTED_TYPE(Class, "non-unique")
	NON_UNIQUE_TYPE(TypeOfExpr)
	NON_UNIQUE_TYPE(VariableArray)
	#undef NON_UNIQUE_TYPE

	UNEXPECTED_TYPE(TypeOf, "sugar")

	#undef UNEXPECTED_TYPE

	case Type::Auto: {
	const auto AX = cast<AutoType>(X), AY = cast<AutoType>(Y);
	assert(AX->getDeducedType().isNull());
	assert(AY->getDeducedType().isNull());
	assert(AX->getKeyword() == AY->getKeyword());
	assert(AX->isInstantiationDependentType() ==
	AY->isInstantiationDependentType());
	auto As = getCommonTemplateArguments(Ctx, AX->getTypeConstraintArguments(),
	AY->getTypeConstraintArguments());
	return Ctx.getAutoType(QualType(), AX->getKeyword(),
	AX->isInstantiationDependentType(),
	AX->containsUnexpandedParameterPack(),
	getCommonDeclChecked(AX->getTypeConstraintConcept(),
	AY->getTypeConstraintConcept()),
	As);
	}
	case Type::IncompleteArray: {
	const auto *AX = cast<IncompleteArrayType>(X),
	*AY = cast<IncompleteArrayType>(Y);
	return Ctx.getIncompleteArrayType(
	getCommonArrayElementType(Ctx, AX, QX, AY, QY),
	getCommonSizeModifier(AX, AY), getCommonIndexTypeCVRQualifiers(AX, AY));
	}
	case Type::DependentSizedArray: {
	const auto *AX = cast<DependentSizedArrayType>(X),
	*AY = cast<DependentSizedArrayType>(Y);
	return Ctx.getDependentSizedArrayType(
	getCommonArrayElementType(Ctx, AX, QX, AY, QY),
	getCommonSizeExpr(Ctx, AX, AY), getCommonSizeModifier(AX, AY),
	getCommonIndexTypeCVRQualifiers(AX, AY),
	AX->getBracketsRange() == AY->getBracketsRange()
	? AX->getBracketsRange()
	: SourceRange());
	}
	case Type::ConstantArray: {
	const auto *AX = cast<ConstantArrayType>(X),
	*AY = cast<ConstantArrayType>(Y);
	assert(AX->getSize() == AY->getSize());
	const Expr *SizeExpr = Ctx.hasSameExpr(AX->getSizeExpr(), AY->getSizeExpr())
	? AX->getSizeExpr()
	: nullptr;
	return Ctx.getConstantArrayType(
	getCommonArrayElementType(Ctx, AX, QX, AY, QY), AX->getSize(), SizeExpr,
	getCommonSizeModifier(AX, AY), getCommonIndexTypeCVRQualifiers(AX, AY));
	}
	case Type::Atomic: {
	const auto AX = cast<AtomicType>(X), AY = cast<AtomicType>(Y);
	return Ctx.getAtomicType(
	Ctx.getCommonSugaredType(AX->getValueType(), AY->getValueType()));
	}
	case Type::Complex: {
	const auto CX = cast<ComplexType>(X), CY = cast<ComplexType>(Y);
	return Ctx.getComplexType(getCommonArrayElementType(Ctx, CX, QX, CY, QY));
	}
	case Type::Pointer: {
	const auto PX = cast<PointerType>(X), PY = cast<PointerType>(Y);
	return Ctx.getPointerType(getCommonPointeeType(Ctx, PX, PY));
	}
	case Type::BlockPointer: {
	const auto PX = cast<BlockPointerType>(X), PY = cast<BlockPointerType>(Y);
	return Ctx.getBlockPointerType(getCommonPointeeType(Ctx, PX, PY));
	}
	case Type::ObjCObjectPointer: {
	const auto *PX = cast<ObjCObjectPointerType>(X),
	*PY = cast<ObjCObjectPointerType>(Y);
	return Ctx.getObjCObjectPointerType(getCommonPointeeType(Ctx, PX, PY));
	}
	case Type::MemberPointer: {
	const auto *PX = cast<MemberPointerType>(X),
	*PY = cast<MemberPointerType>(Y);
	return Ctx.getMemberPointerType(
	getCommonPointeeType(Ctx, PX, PY),
	Ctx.getCommonSugaredType(QualType(PX->getClass(), 0),
	QualType(PY->getClass(), 0))
	.getTypePtr());
	}
	case Type::LValueReference: {
	const auto *PX = cast<LValueReferenceType>(X),
	*PY = cast<LValueReferenceType>(Y);
	// FIXME: Preserve PointeeTypeAsWritten.
	return Ctx.getLValueReferenceType(getCommonPointeeType(Ctx, PX, PY),
	PX->isSpelledAsLValue() \|\|
	PY->isSpelledAsLValue());
	}
	case Type::RValueReference: {
	const auto *PX = cast<RValueReferenceType>(X),
	*PY = cast<RValueReferenceType>(Y);
	// FIXME: Preserve PointeeTypeAsWritten.
	return Ctx.getRValueReferenceType(getCommonPointeeType(Ctx, PX, PY));
	}
	case Type::DependentAddressSpace: {
	const auto *PX = cast<DependentAddressSpaceType>(X),
	*PY = cast<DependentAddressSpaceType>(Y);
	assert(Ctx.hasSameExpr(PX->getAddrSpaceExpr(), PY->getAddrSpaceExpr()));
	return Ctx.getDependentAddressSpaceType(getCommonPointeeType(Ctx, PX, PY),
	PX->getAddrSpaceExpr(),
	getCommonAttrLoc(PX, PY));
	}
	case Type::FunctionNoProto: {
	const auto *FX = cast<FunctionNoProtoType>(X),
	*FY = cast<FunctionNoProtoType>(Y);
	assert(FX->getExtInfo() == FY->getExtInfo());
	return Ctx.getFunctionNoProtoType(
	Ctx.getCommonSugaredType(FX->getReturnType(), FY->getReturnType()),
	FX->getExtInfo());
	}
	case Type::FunctionProto: {
	const auto *FX = cast<FunctionProtoType>(X),
	*FY = cast<FunctionProtoType>(Y);
	FunctionProtoType::ExtProtoInfo EPIX = FX->getExtProtoInfo(),
	EPIY = FY->getExtProtoInfo();
	assert(EPIX.ExtInfo == EPIY.ExtInfo);
	assert(EPIX.ExtParameterInfos == EPIY.ExtParameterInfos);
	assert(EPIX.RefQualifier == EPIY.RefQualifier);
	assert(EPIX.TypeQuals == EPIY.TypeQuals);
	assert(EPIX.Variadic == EPIY.Variadic);

	// FIXME: Can we handle an empty EllipsisLoc?
	// Use emtpy EllipsisLoc if X and Y differ.

	EPIX.HasTrailingReturn = EPIX.HasTrailingReturn && EPIY.HasTrailingReturn;

	QualType R =
	Ctx.getCommonSugaredType(FX->getReturnType(), FY->getReturnType());
	auto P = getCommonTypes(Ctx, FX->param_types(), FY->param_types(),
	/Unqualified=/true);

	SmallVector<QualType, 8> Exceptions;
	EPIX.ExceptionSpec = Ctx.mergeExceptionSpecs(
	EPIX.ExceptionSpec, EPIY.ExceptionSpec, Exceptions, true);
	return Ctx.getFunctionType(R, P, EPIX);
	}
	case Type::ObjCObject: {
	const auto OX = cast<ObjCObjectType>(X), OY = cast<ObjCObjectType>(Y);
	assert(
	std::equal(OX->getProtocols().begin(), OX->getProtocols().end(),
	OY->getProtocols().begin(), OY->getProtocols().end(),
	[](const ObjCProtocolDecl P0, const ObjCProtocolDecl P1) {
	return P0->getCanonicalDecl() == P1->getCanonicalDecl();
	}) &&
	"protocol lists must be the same");
	auto TAs = getCommonTypes(Ctx, OX->getTypeArgsAsWritten(),
	OY->getTypeArgsAsWritten());
	return Ctx.getObjCObjectType(
	Ctx.getCommonSugaredType(OX->getBaseType(), OY->getBaseType()), TAs,
	OX->getProtocols(),
	OX->isKindOfTypeAsWritten() && OY->isKindOfTypeAsWritten());
	}
	case Type::ConstantMatrix: {
	const auto *MX = cast<ConstantMatrixType>(X),
	*MY = cast<ConstantMatrixType>(Y);
	assert(MX->getNumRows() == MY->getNumRows());
	assert(MX->getNumColumns() == MY->getNumColumns());
	return Ctx.getConstantMatrixType(getCommonElementType(Ctx, MX, MY),
	MX->getNumRows(), MX->getNumColumns());
	}
	case Type::DependentSizedMatrix: {
	const auto *MX = cast<DependentSizedMatrixType>(X),
	*MY = cast<DependentSizedMatrixType>(Y);
	assert(Ctx.hasSameExpr(MX->getRowExpr(), MY->getRowExpr()));
	assert(Ctx.hasSameExpr(MX->getColumnExpr(), MY->getColumnExpr()));
	return Ctx.getDependentSizedMatrixType(
	getCommonElementType(Ctx, MX, MY), MX->getRowExpr(),
	MX->getColumnExpr(), getCommonAttrLoc(MX, MY));
	}
	case Type::Vector: {
	const auto VX = cast<VectorType>(X), VY = cast<VectorType>(Y);
	assert(VX->getNumElements() == VY->getNumElements());
	assert(VX->getVectorKind() == VY->getVectorKind());
	return Ctx.getVectorType(getCommonElementType(Ctx, VX, VY),
	VX->getNumElements(), VX->getVectorKind());
	}
	case Type::ExtVector: {
	const auto VX = cast<ExtVectorType>(X), VY = cast<ExtVectorType>(Y);
	assert(VX->getNumElements() == VY->getNumElements());
	return Ctx.getExtVectorType(getCommonElementType(Ctx, VX, VY),
	VX->getNumElements());
	}
	case Type::DependentSizedExtVector: {
	const auto *VX = cast<DependentSizedExtVectorType>(X),
	*VY = cast<DependentSizedExtVectorType>(Y);
	return Ctx.getDependentSizedExtVectorType(getCommonElementType(Ctx, VX, VY),
	getCommonSizeExpr(Ctx, VX, VY),
	getCommonAttrLoc(VX, VY));
	}
	case Type::DependentVector: {
	const auto *VX = cast<DependentVectorType>(X),
	*VY = cast<DependentVectorType>(Y);
	assert(VX->getVectorKind() == VY->getVectorKind());
	return Ctx.getDependentVectorType(
	getCommonElementType(Ctx, VX, VY), getCommonSizeExpr(Ctx, VX, VY),
	getCommonAttrLoc(VX, VY), VX->getVectorKind());
	}
	case Type::InjectedClassName: {
	const auto *IX = cast<InjectedClassNameType>(X),
	*IY = cast<InjectedClassNameType>(Y);
	return Ctx.getInjectedClassNameType(
	getCommonDeclChecked(IX->getDecl(), IY->getDecl()),
	Ctx.getCommonSugaredType(IX->getInjectedSpecializationType(),
	IY->getInjectedSpecializationType()));
	}
	case Type::TemplateSpecialization: {
	const auto *TX = cast<TemplateSpecializationType>(X),
	*TY = cast<TemplateSpecializationType>(Y);
	auto As = getCommonTemplateArguments(Ctx, TX->template_arguments(),
	TY->template_arguments());
	return Ctx.getTemplateSpecializationType(
	::getCommonTemplateNameChecked(Ctx, TX->getTemplateName(),
	TY->getTemplateName()),
	As, X->getCanonicalTypeInternal());
	}
	case Type::DependentName: {
	const auto *NX = cast<DependentNameType>(X),
	*NY = cast<DependentNameType>(Y);
	assert(NX->getIdentifier() == NY->getIdentifier());
	return Ctx.getDependentNameType(
	getCommonTypeKeyword(NX, NY), getCommonNNS(Ctx, NX, NY),
	NX->getIdentifier(), NX->getCanonicalTypeInternal());
	}
	case Type::DependentTemplateSpecialization: {
	const auto *TX = cast<DependentTemplateSpecializationType>(X),
	*TY = cast<DependentTemplateSpecializationType>(Y);
	assert(TX->getIdentifier() == TY->getIdentifier());
	auto As = getCommonTemplateArguments(Ctx, TX->template_arguments(),
	TY->template_arguments());
	return Ctx.getDependentTemplateSpecializationType(
	getCommonTypeKeyword(TX, TY), getCommonNNS(Ctx, TX, TY),
	TX->getIdentifier(), As);
	}
	case Type::UnaryTransform: {
	const auto *TX = cast<UnaryTransformType>(X),
	*TY = cast<UnaryTransformType>(Y);
	assert(TX->getUTTKind() == TY->getUTTKind());
	return Ctx.getUnaryTransformType(
	Ctx.getCommonSugaredType(TX->getBaseType(), TY->getBaseType()),
	Ctx.getCommonSugaredType(TX->getUnderlyingType(),
	TY->getUnderlyingType()),
	TX->getUTTKind());
	}
	case Type::PackExpansion: {
	const auto *PX = cast<PackExpansionType>(X),
	*PY = cast<PackExpansionType>(Y);
	assert(PX->getNumExpansions() == PY->getNumExpansions());
	return Ctx.getPackExpansionType(
	Ctx.getCommonSugaredType(PX->getPattern(), PY->getPattern()),
	PX->getNumExpansions(), false);
	}
	case Type::Pipe: {
	const auto PX = cast<PipeType>(X), PY = cast<PipeType>(Y);
	assert(PX->isReadOnly() == PY->isReadOnly());
	auto MP = PX->isReadOnly() ? &ASTContext::getReadPipeType
	: &ASTContext::getWritePipeType;
	return (Ctx.*MP)(getCommonElementType(Ctx, PX, PY));
	}
	case Type::TemplateTypeParm: {
	const auto *TX = cast<TemplateTypeParmType>(X),
	*TY = cast<TemplateTypeParmType>(Y);
	assert(TX->getDepth() == TY->getDepth());
	assert(TX->getIndex() == TY->getIndex());
	assert(TX->isParameterPack() == TY->isParameterPack());
	return Ctx.getTemplateTypeParmType(
	TX->getDepth(), TX->getIndex(), TX->isParameterPack(),
	getCommonDecl(TX->getDecl(), TY->getDecl()));
	}
	}
	llvm_unreachable("Unknown Type Class");
	}

	static QualType getCommonSugarTypeNode(ASTContext &Ctx, const Type *X,
	const Type *Y,
	SplitQualType Underlying) {
	Type::TypeClass TC = X->getTypeClass();
	if (TC != Y->getTypeClass())
	return QualType();
	switch (TC) {
	#define UNEXPECTED_TYPE(Class, Kind) \
	case Type::Class: \
	llvm_unreachable("Unexpected " Kind ": " #Class);
	#define TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) UNEXPECTED_TYPE(Class, "dependent")
	#include "clang/AST/TypeNodes.inc"

	#define CANONICAL_TYPE(Class) UNEXPECTED_TYPE(Class, "canonical")
	CANONICAL_TYPE(Atomic)
	CANONICAL_TYPE(BitInt)
	CANONICAL_TYPE(BlockPointer)
	CANONICAL_TYPE(Builtin)
	CANONICAL_TYPE(Complex)
	CANONICAL_TYPE(ConstantArray)
	CANONICAL_TYPE(ConstantMatrix)
	CANONICAL_TYPE(Enum)
	CANONICAL_TYPE(ExtVector)
	CANONICAL_TYPE(FunctionNoProto)
	CANONICAL_TYPE(FunctionProto)
	CANONICAL_TYPE(IncompleteArray)
	CANONICAL_TYPE(LValueReference)
	CANONICAL_TYPE(MemberPointer)
	CANONICAL_TYPE(ObjCInterface)
	CANONICAL_TYPE(ObjCObject)
	CANONICAL_TYPE(ObjCObjectPointer)
	CANONICAL_TYPE(Pipe)
	CANONICAL_TYPE(Pointer)
	CANONICAL_TYPE(Record)
	CANONICAL_TYPE(RValueReference)
	CANONICAL_TYPE(VariableArray)
	CANONICAL_TYPE(Vector)
	#undef CANONICAL_TYPE

	#undef UNEXPECTED_TYPE

	case Type::Adjusted: {
	const auto AX = cast<AdjustedType>(X), AY = cast<AdjustedType>(Y);
	QualType OX = AX->getOriginalType(), OY = AY->getOriginalType();
	if (!Ctx.hasSameType(OX, OY))
	return QualType();
	// FIXME: It's inefficient to have to unify the original types.
	return Ctx.getAdjustedType(Ctx.getCommonSugaredType(OX, OY),
	Ctx.getQualifiedType(Underlying));
	}
	case Type::Decayed: {
	const auto DX = cast<DecayedType>(X), DY = cast<DecayedType>(Y);
	QualType OX = DX->getOriginalType(), OY = DY->getOriginalType();
	if (!Ctx.hasSameType(OX, OY))
	return QualType();
	// FIXME: It's inefficient to have to unify the original types.
	return Ctx.getDecayedType(Ctx.getCommonSugaredType(OX, OY),
	Ctx.getQualifiedType(Underlying));
	}
	case Type::Attributed: {
	const auto AX = cast<AttributedType>(X), AY = cast<AttributedType>(Y);
	AttributedType::Kind Kind = AX->getAttrKind();
	if (Kind != AY->getAttrKind())
	return QualType();
	QualType MX = AX->getModifiedType(), MY = AY->getModifiedType();
	if (!Ctx.hasSameType(MX, MY))
	return QualType();
	// FIXME: It's inefficient to have to unify the modified types.
	return Ctx.getAttributedType(Kind, Ctx.getCommonSugaredType(MX, MY),
	Ctx.getQualifiedType(Underlying));
	}
	case Type::BTFTagAttributed: {
	const auto *BX = cast<BTFTagAttributedType>(X);
	const BTFTypeTagAttr *AX = BX->getAttr();
	// The attribute is not uniqued, so just compare the tag.
	if (AX->getBTFTypeTag() !=
	cast<BTFTagAttributedType>(Y)->getAttr()->getBTFTypeTag())
	return QualType();
	return Ctx.getBTFTagAttributedType(AX, Ctx.getQualifiedType(Underlying));
	}
	case Type::Auto: {
	const auto AX = cast<AutoType>(X), AY = cast<AutoType>(Y);

	AutoTypeKeyword KW = AX->getKeyword();
	if (KW != AY->getKeyword())
	return QualType();

	ConceptDecl *CD = ::getCommonDecl(AX->getTypeConstraintConcept(),
	AY->getTypeConstraintConcept());
	SmallVector<TemplateArgument, 8> As;
	if (CD &&
	getCommonTemplateArguments(Ctx, As, AX->getTypeConstraintArguments(),
	AY->getTypeConstraintArguments()))
	CD = nullptr; // The arguments differ, so make it unconstrained.

	// Both auto types can't be dependent, otherwise they wouldn't have been
	// sugar. This implies they can't contain unexpanded packs either.
	return Ctx.getAutoType(Ctx.getQualifiedType(Underlying), AX->getKeyword(),
	/IsDependent=/false, /IsPack=/false, CD, As);
	}
	case Type::Decltype:
	return QualType();
	case Type::DeducedTemplateSpecialization:
	// FIXME: Try to merge these.
	return QualType();

	case Type::Elaborated: {
	const auto EX = cast<ElaboratedType>(X), EY = cast<ElaboratedType>(Y);
	return Ctx.getElaboratedType(
	::getCommonTypeKeyword(EX, EY), ::getCommonNNS(Ctx, EX, EY),
	Ctx.getQualifiedType(Underlying),
	::getCommonDecl(EX->getOwnedTagDecl(), EY->getOwnedTagDecl()));
	}
	case Type::MacroQualified: {
	const auto *MX = cast<MacroQualifiedType>(X),
	*MY = cast<MacroQualifiedType>(Y);
	const IdentifierInfo *IX = MX->getMacroIdentifier();
	if (IX != MY->getMacroIdentifier())
	return QualType();
	return Ctx.getMacroQualifiedType(Ctx.getQualifiedType(Underlying), IX);
	}
	case Type::SubstTemplateTypeParm: {
	const auto *SX = cast<SubstTemplateTypeParmType>(X),
	*SY = cast<SubstTemplateTypeParmType>(Y);
	Decl *CD =
	::getCommonDecl(SX->getAssociatedDecl(), SY->getAssociatedDecl());
	if (!CD)
	return QualType();
	unsigned Index = SX->getIndex();
	if (Index != SY->getIndex())
	return QualType();
	auto PackIndex = SX->getPackIndex();
	if (PackIndex != SY->getPackIndex())
	return QualType();
	return Ctx.getSubstTemplateTypeParmType(Ctx.getQualifiedType(Underlying),
	CD, Index, PackIndex);
	}
	case Type::ObjCTypeParam:
	// FIXME: Try to merge these.
	return QualType();
	case Type::Paren:
	return Ctx.getParenType(Ctx.getQualifiedType(Underlying));

	case Type::TemplateSpecialization: {
	const auto *TX = cast<TemplateSpecializationType>(X),
	*TY = cast<TemplateSpecializationType>(Y);
	TemplateName CTN = ::getCommonTemplateName(Ctx, TX->getTemplateName(),
	TY->getTemplateName());
	if (!CTN.getAsVoidPointer())
	return QualType();
	SmallVector<TemplateArgument, 8> Args;
	if (getCommonTemplateArguments(Ctx, Args, TX->template_arguments(),
	TY->template_arguments()))
	return QualType();
	return Ctx.getTemplateSpecializationType(CTN, Args,
	Ctx.getQualifiedType(Underlying));
	}
	case Type::Typedef: {
	const auto TX = cast<TypedefType>(X), TY = cast<TypedefType>(Y);
	const TypedefNameDecl *CD = ::getCommonDecl(TX->getDecl(), TY->getDecl());
	if (!CD)
	return QualType();
	return Ctx.getTypedefType(CD, Ctx.getQualifiedType(Underlying));
	}
	case Type::TypeOf: {
	// The common sugar between two typeof expressions, where one is
	// potentially a typeof_unqual and the other is not, we unify to the
	// qualified type as that retains the most information along with the type.
	// We only return a typeof_unqual type when both types are unqual types.
	TypeOfKind Kind = TypeOfKind::Qualified;
	if (cast<TypeOfType>(X)->getKind() == cast<TypeOfType>(Y)->getKind() &&
	cast<TypeOfType>(X)->getKind() == TypeOfKind::Unqualified)
	Kind = TypeOfKind::Unqualified;
	return Ctx.getTypeOfType(Ctx.getQualifiedType(Underlying), Kind);
	}
	case Type::TypeOfExpr:
	return QualType();

	case Type::UnaryTransform: {
	const auto *UX = cast<UnaryTransformType>(X),
	*UY = cast<UnaryTransformType>(Y);
	UnaryTransformType::UTTKind KX = UX->getUTTKind();
	if (KX != UY->getUTTKind())
	return QualType();
	QualType BX = UX->getBaseType(), BY = UY->getBaseType();
	if (!Ctx.hasSameType(BX, BY))
	return QualType();
	// FIXME: It's inefficient to have to unify the base types.
	return Ctx.getUnaryTransformType(Ctx.getCommonSugaredType(BX, BY),
	Ctx.getQualifiedType(Underlying), KX);
	}
	case Type::Using: {
	const auto UX = cast<UsingType>(X), UY = cast<UsingType>(Y);
	const UsingShadowDecl *CD =
	::getCommonDecl(UX->getFoundDecl(), UY->getFoundDecl());
	if (!CD)
	return QualType();
	return Ctx.getUsingType(CD, Ctx.getQualifiedType(Underlying));
	}
	}
	llvm_unreachable("Unhandled Type Class");
	}

	static auto unwrapSugar(SplitQualType &T, Qualifiers &QTotal) {
	SmallVector<SplitQualType, 8> R;
	while (true) {
	QTotal += T.Quals;
	QualType NT = T.Ty->getLocallyUnqualifiedSingleStepDesugaredType();
	if (NT == QualType(T.Ty, 0))
	break;
	R.push_back(T);
	T = NT.split();
	}
	return R;
	}

	QualType ASTContext::getCommonSugaredType(QualType X, QualType Y,
	bool Unqualified) {
	assert(Unqualified ? hasSameUnqualifiedType(X, Y) : hasSameType(X, Y));
	if (X == Y)
	return X;
	if (!Unqualified) {
	if (X.isCanonical())
	return X;
	if (Y.isCanonical())
	return Y;
	}

	SplitQualType SX = X.split(), SY = Y.split();
	Qualifiers QX, QY;
	// Desugar SX and SY, setting the sugar and qualifiers aside into Xs and Ys,
	// until we reach their underlying "canonical nodes". Note these are not
	// necessarily canonical types, as they may still have sugared properties.
	// QX and QY will store the sum of all qualifiers in Xs and Ys respectively.
	auto Xs = ::unwrapSugar(SX, QX), Ys = ::unwrapSugar(SY, QY);
	if (SX.Ty != SY.Ty) {
	// The canonical nodes differ. Build a common canonical node out of the two,
	// unifying their sugar. This may recurse back here.
	SX.Ty =
	::getCommonNonSugarTypeNode(*this, SX.Ty, QX, SY.Ty, QY).getTypePtr();
	} else {
	// The canonical nodes were identical: We may have desugared too much.
	// Add any common sugar back in.
	while (!Xs.empty() && !Ys.empty() && Xs.back().Ty == Ys.back().Ty) {
	QX -= SX.Quals;
	QY -= SY.Quals;
	SX = Xs.pop_back_val();
	SY = Ys.pop_back_val();
	}
	}
	if (Unqualified)
	QX = Qualifiers::removeCommonQualifiers(QX, QY);
	else
	assert(QX == QY);

	// Even though the remaining sugar nodes in Xs and Ys differ, some may be
	// related. Walk up these nodes, unifying them and adding the result.
	while (!Xs.empty() && !Ys.empty()) {
	auto Underlying = SplitQualType(
	SX.Ty, Qualifiers::removeCommonQualifiers(SX.Quals, SY.Quals));
	SX = Xs.pop_back_val();
	SY = Ys.pop_back_val();
	SX.Ty = ::getCommonSugarTypeNode(*this, SX.Ty, SY.Ty, Underlying)
	.getTypePtrOrNull();
	// Stop at the first pair which is unrelated.
	if (!SX.Ty) {
	SX.Ty = Underlying.Ty;
	break;
	}
	QX -= Underlying.Quals;
	};

	// Add back the missing accumulated qualifiers, which were stripped off
	// with the sugar nodes we could not unify.
	QualType R = getQualifiedType(SX.Ty, QX);
	assert(Unqualified ? hasSameUnqualifiedType(R, X) : hasSameType(R, X));
	return R;
	}

	QualType ASTContext::getCorrespondingSaturatedType(QualType Ty) const {
	assert(Ty->isFixedPointType());

	if (Ty->isSaturatedFixedPointType()) return Ty;

	switch (Ty->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a fixed point type!");
	case BuiltinType::ShortAccum:
	return SatShortAccumTy;
	case BuiltinType::Accum:
	return SatAccumTy;
	case BuiltinType::LongAccum:
	return SatLongAccumTy;
	case BuiltinType::UShortAccum:
	return SatUnsignedShortAccumTy;
	case BuiltinType::UAccum:
	return SatUnsignedAccumTy;
	case BuiltinType::ULongAccum:
	return SatUnsignedLongAccumTy;
	case BuiltinType::ShortFract:
	return SatShortFractTy;
	case BuiltinType::Fract:
	return SatFractTy;
	case BuiltinType::LongFract:
	return SatLongFractTy;
	case BuiltinType::UShortFract:
	return SatUnsignedShortFractTy;
	case BuiltinType::UFract:
	return SatUnsignedFractTy;
	case BuiltinType::ULongFract:
	return SatUnsignedLongFractTy;
	}
	}

	LangAS ASTContext::getLangASForBuiltinAddressSpace(unsigned AS) const {
	if (LangOpts.OpenCL)
	return getTargetInfo().getOpenCLBuiltinAddressSpace(AS);

	if (LangOpts.CUDA)
	return getTargetInfo().getCUDABuiltinAddressSpace(AS);

	return getLangASFromTargetAS(AS);
	}

	// Explicitly instantiate this in case a Redeclarable<T> is used from a TU that
	// doesn't include ASTContext.h
	template
	clang::LazyGenerationalUpdatePtr<
	const Decl , Decl , &ExternalASTSource::CompleteRedeclChain>::ValueType
	clang::LazyGenerationalUpdatePtr<
	const Decl , Decl , &ExternalASTSource::CompleteRedeclChain>::makeValue(
	const clang::ASTContext &Ctx, Decl *Value);

	unsigned char ASTContext::getFixedPointScale(QualType Ty) const {
	assert(Ty->isFixedPointType());

	const TargetInfo &Target = getTargetInfo();
	switch (Ty->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a fixed point type!");
	case BuiltinType::ShortAccum:
	case BuiltinType::SatShortAccum:
	return Target.getShortAccumScale();
	case BuiltinType::Accum:
	case BuiltinType::SatAccum:
	return Target.getAccumScale();
	case BuiltinType::LongAccum:
	case BuiltinType::SatLongAccum:
	return Target.getLongAccumScale();
	case BuiltinType::UShortAccum:
	case BuiltinType::SatUShortAccum:
	return Target.getUnsignedShortAccumScale();
	case BuiltinType::UAccum:
	case BuiltinType::SatUAccum:
	return Target.getUnsignedAccumScale();
	case BuiltinType::ULongAccum:
	case BuiltinType::SatULongAccum:
	return Target.getUnsignedLongAccumScale();
	case BuiltinType::ShortFract:
	case BuiltinType::SatShortFract:
	return Target.getShortFractScale();
	case BuiltinType::Fract:
	case BuiltinType::SatFract:
	return Target.getFractScale();
	case BuiltinType::LongFract:
	case BuiltinType::SatLongFract:
	return Target.getLongFractScale();
	case BuiltinType::UShortFract:
	case BuiltinType::SatUShortFract:
	return Target.getUnsignedShortFractScale();
	case BuiltinType::UFract:
	case BuiltinType::SatUFract:
	return Target.getUnsignedFractScale();
	case BuiltinType::ULongFract:
	case BuiltinType::SatULongFract:
	return Target.getUnsignedLongFractScale();
	}
	}

	unsigned char ASTContext::getFixedPointIBits(QualType Ty) const {
	assert(Ty->isFixedPointType());

	const TargetInfo &Target = getTargetInfo();
	switch (Ty->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a fixed point type!");
	case BuiltinType::ShortAccum:
	case BuiltinType::SatShortAccum:
	return Target.getShortAccumIBits();
	case BuiltinType::Accum:
	case BuiltinType::SatAccum:
	return Target.getAccumIBits();
	case BuiltinType::LongAccum:
	case BuiltinType::SatLongAccum:
	return Target.getLongAccumIBits();
	case BuiltinType::UShortAccum:
	case BuiltinType::SatUShortAccum:
	return Target.getUnsignedShortAccumIBits();
	case BuiltinType::UAccum:
	case BuiltinType::SatUAccum:
	return Target.getUnsignedAccumIBits();
	case BuiltinType::ULongAccum:
	case BuiltinType::SatULongAccum:
	return Target.getUnsignedLongAccumIBits();
	case BuiltinType::ShortFract:
	case BuiltinType::SatShortFract:
	case BuiltinType::Fract:
	case BuiltinType::SatFract:
	case BuiltinType::LongFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::UShortFract:
	case BuiltinType::SatUShortFract:
	case BuiltinType::UFract:
	case BuiltinType::SatUFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatULongFract:
	return 0;
	}
	}

	llvm::FixedPointSemantics
	ASTContext::getFixedPointSemantics(QualType Ty) const {
	assert((Ty->isFixedPointType() \|\| Ty->isIntegerType()) &&
	"Can only get the fixed point semantics for a "
	"fixed point or integer type.");
	if (Ty->isIntegerType())
	return llvm::FixedPointSemantics::GetIntegerSemantics(
	getIntWidth(Ty), Ty->isSignedIntegerType());

	bool isSigned = Ty->isSignedFixedPointType();
	return llvm::FixedPointSemantics(
	static_cast<unsigned>(getTypeSize(Ty)), getFixedPointScale(Ty), isSigned,
	Ty->isSaturatedFixedPointType(),
	!isSigned && getTargetInfo().doUnsignedFixedPointTypesHavePadding());
	}

	llvm::APFixedPoint ASTContext::getFixedPointMax(QualType Ty) const {
	assert(Ty->isFixedPointType());
	return llvm::APFixedPoint::getMax(getFixedPointSemantics(Ty));
	}

	llvm::APFixedPoint ASTContext::getFixedPointMin(QualType Ty) const {
	assert(Ty->isFixedPointType());
	return llvm::APFixedPoint::getMin(getFixedPointSemantics(Ty));
	}

	QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const {
	assert(Ty->isUnsignedFixedPointType() &&
	"Expected unsigned fixed point type");

	switch (Ty->castAs<BuiltinType>()->getKind()) {
	case BuiltinType::UShortAccum:
	return ShortAccumTy;
	case BuiltinType::UAccum:
	return AccumTy;
	case BuiltinType::ULongAccum:
	return LongAccumTy;
	case BuiltinType::SatUShortAccum:
	return SatShortAccumTy;
	case BuiltinType::SatUAccum:
	return SatAccumTy;
	case BuiltinType::SatULongAccum:
	return SatLongAccumTy;
	case BuiltinType::UShortFract:
	return ShortFractTy;
	case BuiltinType::UFract:
	return FractTy;
	case BuiltinType::ULongFract:
	return LongFractTy;
	case BuiltinType::SatUShortFract:
	return SatShortFractTy;
	case BuiltinType::SatUFract:
	return SatFractTy;
	case BuiltinType::SatULongFract:
	return SatLongFractTy;
	default:
	llvm_unreachable("Unexpected unsigned fixed point type");
	}
	}

	std::vector<std::string> ASTContext::filterFunctionTargetVersionAttrs(
	const TargetVersionAttr *TV) const {
	assert(TV != nullptr);
	llvm::SmallVector<StringRef, 8> Feats;
	std::vector<std::string> ResFeats;
	TV->getFeatures(Feats);
	for (auto &Feature : Feats)
	if (Target->validateCpuSupports(Feature.str()))
	ResFeats.push_back("?" + Feature.str());
	return ResFeats;
	}

	ParsedTargetAttr
	ASTContext::filterFunctionTargetAttrs(const TargetAttr *TD) const {
	assert(TD != nullptr);
	ParsedTargetAttr ParsedAttr = Target->parseTargetAttr(TD->getFeaturesStr());

	llvm::erase_if(ParsedAttr.Features, [&](const std::string &Feat) {
	return !Target->isValidFeatureName(StringRef{Feat}.substr(1));
	});
	return ParsedAttr;
	}

	void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
	const FunctionDecl *FD) const {
	if (FD)
	getFunctionFeatureMap(FeatureMap, GlobalDecl().getWithDecl(FD));
	else
	Target->initFeatureMap(FeatureMap, getDiagnostics(),
	Target->getTargetOpts().CPU,
	Target->getTargetOpts().Features);
	}

	// Fills in the supplied string map with the set of target features for the
	// passed in function.
	void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
	GlobalDecl GD) const {
	StringRef TargetCPU = Target->getTargetOpts().CPU;
	const FunctionDecl *FD = GD.getDecl()->getAsFunction();
	if (const auto *TD = FD->getAttr<TargetAttr>()) {
	ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD);

	// Make a copy of the features as passed on the command line into the
	// beginning of the additional features from the function to override.
	ParsedAttr.Features.insert(
	ParsedAttr.Features.begin(),
	Target->getTargetOpts().FeaturesAsWritten.begin(),
	Target->getTargetOpts().FeaturesAsWritten.end());

	if (ParsedAttr.CPU != "" && Target->isValidCPUName(ParsedAttr.CPU))
	TargetCPU = ParsedAttr.CPU;

	// Now populate the feature map, first with the TargetCPU which is either
	// the default or a new one from the target attribute string. Then we'll use
	// the passed in features (FeaturesAsWritten) along with the new ones from
	// the attribute.
	Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU,
	ParsedAttr.Features);
	} else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
	llvm::SmallVector<StringRef, 32> FeaturesTmp;
	Target->getCPUSpecificCPUDispatchFeatures(
	SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
	std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
	Features.insert(Features.begin(),
	Target->getTargetOpts().FeaturesAsWritten.begin(),
	Target->getTargetOpts().FeaturesAsWritten.end());
	Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
	} else if (const auto *TC = FD->getAttr<TargetClonesAttr>()) {
	std::vector<std::string> Features;
	StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
	if (Target->getTriple().isAArch64()) {
	// TargetClones for AArch64
	if (VersionStr != "default") {
	SmallVector<StringRef, 1> VersionFeatures;
	VersionStr.split(VersionFeatures, "+");
	for (auto &VFeature : VersionFeatures) {
	VFeature = VFeature.trim();
	Features.push_back((StringRef{"?"} + VFeature).str());
	}
	}
	Features.insert(Features.begin(),
	Target->getTargetOpts().FeaturesAsWritten.begin(),
	Target->getTargetOpts().FeaturesAsWritten.end());
	} else {
	if (VersionStr.startswith("arch="))
	TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
	else if (VersionStr != "default")
	Features.push_back((StringRef{"+"} + VersionStr).str());
	}
	Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
	} else if (const auto *TV = FD->getAttr<TargetVersionAttr>()) {
	std::vector<std::string> Feats = filterFunctionTargetVersionAttrs(TV);
	Feats.insert(Feats.begin(),
	Target->getTargetOpts().FeaturesAsWritten.begin(),
	Target->getTargetOpts().FeaturesAsWritten.end());
	Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Feats);
	} else {
	FeatureMap = Target->getTargetOpts().FeatureMap;
	}
	}

	OMPTraitInfo &ASTContext::getNewOMPTraitInfo() {
	OMPTraitInfoVector.emplace_back(new OMPTraitInfo());
	return *OMPTraitInfoVector.back();
	}

	const StreamingDiagnostic &clang::
	operator<<(const StreamingDiagnostic &DB,
	const ASTContext::SectionInfo &Section) {
	if (Section.Decl)
	return DB << Section.Decl;
	return DB << "a prior #pragma section";
	}

	bool ASTContext::mayExternalize(const Decl *D) const {
	bool IsStaticVar =
	isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
	bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
	!D->getAttr<CUDADeviceAttr>()->isImplicit()) \|\|
	(D->hasAttr<CUDAConstantAttr>() &&
	!D->getAttr<CUDAConstantAttr>()->isImplicit());
	// CUDA/HIP: static managed variables need to be externalized since it is
	// a declaration in IR, therefore cannot have internal linkage. Kernels in
	// anonymous name space needs to be externalized to avoid duplicate symbols.
	return (IsStaticVar &&
	(D->hasAttr<HIPManagedAttr>() \|\| IsExplicitDeviceVar)) \|\|
	(D->hasAttr<CUDAGlobalAttr>() &&
	basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) ==
	GVA_Internal);
	}

	bool ASTContext::shouldExternalize(const Decl *D) const {
	return mayExternalize(D) &&
	(D->hasAttr<HIPManagedAttr>() \|\| D->hasAttr<CUDAGlobalAttr>() \|\|
	CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
	}

	StringRef ASTContext::getCUIDHash() const {
	if (!CUIDHash.empty())
	return CUIDHash;
	if (LangOpts.CUID.empty())
	return StringRef();
	CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /LowerCase=/true);
	return CUIDHash;
	}
	diff --git a/contrib/llvm-project/clang/lib/AST/ExprConcepts.cpp b/contrib/llvm-project/clang/lib/AST/ExprConcepts.cpp
	index fc8f1eb2abf1..cdc13c2d3969 100644
	--- a/contrib/llvm-project/clang/lib/AST/ExprConcepts.cpp
	+++ b/contrib/llvm-project/clang/lib/AST/ExprConcepts.cpp
	@@ -1,186 +1,197 @@
	//===- ExprCXX.cpp - (C++) Expression AST Node Implementation -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the subclesses of Expr class declared in ExprCXX.h
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ExprConcepts.h"
	#include "clang/AST/ASTConcept.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ComputeDependence.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/DeclarationName.h"
	#include "clang/AST/DependenceFlags.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/NestedNameSpecifier.h"
	#include "clang/AST/TemplateBase.h"
	#include "clang/AST/Type.h"
	#include "clang/Basic/SourceLocation.h"
	#include "llvm/Support/TrailingObjects.h"
	#include <algorithm>
	#include <string>
	#include <utility>

	using namespace clang;

	ConceptSpecializationExpr::ConceptSpecializationExpr(
	const ASTContext &C, NestedNameSpecifierLoc NNS,
	SourceLocation TemplateKWLoc, DeclarationNameInfo ConceptNameInfo,
	NamedDecl FoundDecl, ConceptDecl NamedConcept,
	const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction)
	: Expr(ConceptSpecializationExprClass, C.BoolTy, VK_PRValue, OK_Ordinary),
	ConceptReference(NNS, TemplateKWLoc, ConceptNameInfo, FoundDecl,
	NamedConcept, ArgsAsWritten),
	SpecDecl(SpecDecl),
	Satisfaction(Satisfaction
	? ASTConstraintSatisfaction::Create(C, *Satisfaction)
	: nullptr) {
	setDependence(computeDependence(this, /ValueDependent=/!Satisfaction));

	// Currently guaranteed by the fact concepts can only be at namespace-scope.
	assert(!NestedNameSpec \|\|
	(!NestedNameSpec.getNestedNameSpecifier()->isInstantiationDependent() &&
	!NestedNameSpec.getNestedNameSpecifier()
	->containsUnexpandedParameterPack()));
	assert((!isValueDependent() \|\| isInstantiationDependent()) &&
	"should not be value-dependent");
	}

	ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty)
	: Expr(ConceptSpecializationExprClass, Empty) {}

	+ConceptSpecializationExpr *ConceptSpecializationExpr::Create(
	+ const ASTContext &C, ConceptDecl *NamedConcept,
	+ ImplicitConceptSpecializationDecl *SpecDecl,
	+ const ConstraintSatisfaction *Satisfaction, bool Dependent,
	+ bool ContainsUnexpandedParameterPack) {
	+ return Create(C, NamedConcept, /ArgsAsWritten/ nullptr, SpecDecl, Satisfaction,
	+ Dependent, ContainsUnexpandedParameterPack);
	+}
	+
	ConceptSpecializationExpr *ConceptSpecializationExpr::Create(
	const ASTContext &C, NestedNameSpecifierLoc NNS,
	SourceLocation TemplateKWLoc, DeclarationNameInfo ConceptNameInfo,
	NamedDecl FoundDecl, ConceptDecl NamedConcept,
	const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction) {
	return new (C) ConceptSpecializationExpr(
	C, NNS, TemplateKWLoc, ConceptNameInfo, FoundDecl, NamedConcept,
	ArgsAsWritten, SpecDecl, Satisfaction);
	}

	ConceptSpecializationExpr::ConceptSpecializationExpr(
	const ASTContext &C, ConceptDecl *NamedConcept,
	+ const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction, bool Dependent,
	bool ContainsUnexpandedParameterPack)
	: Expr(ConceptSpecializationExprClass, C.BoolTy, VK_PRValue, OK_Ordinary),
	ConceptReference(NestedNameSpecifierLoc(), SourceLocation(),
	DeclarationNameInfo(), NamedConcept, NamedConcept,
	- nullptr),
	+ ArgsAsWritten),
	SpecDecl(SpecDecl),
	Satisfaction(Satisfaction
	? ASTConstraintSatisfaction::Create(C, *Satisfaction)
	: nullptr) {
	ExprDependence D = ExprDependence::None;
	if (!Satisfaction)
	D \|= ExprDependence::Value;
	if (Dependent)
	D \|= ExprDependence::Instantiation;
	if (ContainsUnexpandedParameterPack)
	D \|= ExprDependence::UnexpandedPack;
	setDependence(D);
	}

	ConceptSpecializationExpr *ConceptSpecializationExpr::Create(
	const ASTContext &C, ConceptDecl *NamedConcept,
	+ const ASTTemplateArgumentListInfo *ArgsAsWritten,
	ImplicitConceptSpecializationDecl *SpecDecl,
	const ConstraintSatisfaction *Satisfaction, bool Dependent,
	bool ContainsUnexpandedParameterPack) {
	- return new (C)
	- ConceptSpecializationExpr(C, NamedConcept, SpecDecl, Satisfaction,
	- Dependent, ContainsUnexpandedParameterPack);
	+ return new (C) ConceptSpecializationExpr(C, NamedConcept, ArgsAsWritten,
	+ SpecDecl, Satisfaction, Dependent,
	+ ContainsUnexpandedParameterPack);
	}

	const TypeConstraint *
	concepts::ExprRequirement::ReturnTypeRequirement::getTypeConstraint() const {
	assert(isTypeConstraint());
	auto TPL =
	TypeConstraintInfo.getPointer().get<TemplateParameterList *>();
	return cast<TemplateTypeParmDecl>(TPL->getParam(0))
	->getTypeConstraint();
	}

	RequiresExpr::RequiresExpr(ASTContext &C, SourceLocation RequiresKWLoc,
	RequiresExprBodyDecl *Body,
	ArrayRef<ParmVarDecl *> LocalParameters,
	ArrayRef<concepts::Requirement *> Requirements,
	SourceLocation RBraceLoc)
	: Expr(RequiresExprClass, C.BoolTy, VK_PRValue, OK_Ordinary),
	NumLocalParameters(LocalParameters.size()),
	NumRequirements(Requirements.size()), Body(Body), RBraceLoc(RBraceLoc) {
	RequiresExprBits.IsSatisfied = false;
	RequiresExprBits.RequiresKWLoc = RequiresKWLoc;
	bool Dependent = false;
	bool ContainsUnexpandedParameterPack = false;
	for (ParmVarDecl *P : LocalParameters) {
	Dependent \|= P->getType()->isInstantiationDependentType();
	ContainsUnexpandedParameterPack \|=
	P->getType()->containsUnexpandedParameterPack();
	}
	RequiresExprBits.IsSatisfied = true;
	for (concepts::Requirement *R : Requirements) {
	Dependent \|= R->isDependent();
	ContainsUnexpandedParameterPack \|= R->containsUnexpandedParameterPack();
	if (!Dependent) {
	RequiresExprBits.IsSatisfied = R->isSatisfied();
	if (!RequiresExprBits.IsSatisfied)
	break;
	}
	}
	std::copy(LocalParameters.begin(), LocalParameters.end(),
	getTrailingObjects<ParmVarDecl *>());
	std::copy(Requirements.begin(), Requirements.end(),
	getTrailingObjects<concepts::Requirement *>());
	RequiresExprBits.IsSatisfied \|= Dependent;
	// FIXME: move the computing dependency logic to ComputeDependence.h
	if (ContainsUnexpandedParameterPack)
	setDependence(getDependence() \| ExprDependence::UnexpandedPack);
	// FIXME: this is incorrect for cases where we have a non-dependent
	// requirement, but its parameters are instantiation-dependent. RequiresExpr
	// should be instantiation-dependent if it has instantiation-dependent
	// parameters.
	if (Dependent)
	setDependence(getDependence() \| ExprDependence::ValueInstantiation);
	}

	RequiresExpr::RequiresExpr(ASTContext &C, EmptyShell Empty,
	unsigned NumLocalParameters,
	unsigned NumRequirements)
	: Expr(RequiresExprClass, Empty), NumLocalParameters(NumLocalParameters),
	NumRequirements(NumRequirements) { }

	RequiresExpr *
	RequiresExpr::Create(ASTContext &C, SourceLocation RequiresKWLoc,
	RequiresExprBodyDecl *Body,
	ArrayRef<ParmVarDecl *> LocalParameters,
	ArrayRef<concepts::Requirement *> Requirements,
	SourceLocation RBraceLoc) {
	void *Mem =
	C.Allocate(totalSizeToAlloc<ParmVarDecl , concepts::Requirement >(
	LocalParameters.size(), Requirements.size()),
	alignof(RequiresExpr));
	return new (Mem) RequiresExpr(C, RequiresKWLoc, Body, LocalParameters,
	Requirements, RBraceLoc);
	}

	RequiresExpr *
	RequiresExpr::Create(ASTContext &C, EmptyShell Empty,
	unsigned NumLocalParameters, unsigned NumRequirements) {
	void *Mem =
	C.Allocate(totalSizeToAlloc<ParmVarDecl , concepts::Requirement >(
	NumLocalParameters, NumRequirements),
	alignof(RequiresExpr));
	return new (Mem) RequiresExpr(C, Empty, NumLocalParameters, NumRequirements);
	}
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
	index 238507e06335..77554aa2c462 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
	@@ -1,8485 +1,8486 @@
	//===-- Clang.cpp - Clang+LLVM ToolChain Implementations --------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "Clang.h"
	#include "AMDGPU.h"
	#include "Arch/AArch64.h"
	#include "Arch/ARM.h"
	#include "Arch/CSKY.h"
	#include "Arch/LoongArch.h"
	#include "Arch/M68k.h"
	#include "Arch/Mips.h"
	#include "Arch/PPC.h"
	#include "Arch/RISCV.h"
	#include "Arch/Sparc.h"
	#include "Arch/SystemZ.h"
	#include "Arch/VE.h"
	#include "Arch/X86.h"
	#include "CommonArgs.h"
	#include "Hexagon.h"
	#include "MSP430.h"
	#include "PS4CPU.h"
	#include "clang/Basic/CLWarnings.h"
	#include "clang/Basic/CharInfo.h"
	#include "clang/Basic/CodeGenOptions.h"
	#include "clang/Basic/HeaderInclude.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/MakeSupport.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/Version.h"
	#include "clang/Config/config.h"
	#include "clang/Driver/Action.h"
	#include "clang/Driver/Distro.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "clang/Driver/Types.h"
	#include "clang/Driver/XRayArgs.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/ARMTargetParserCommon.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Compression.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/Process.h"
	#include "llvm/Support/YAMLParser.h"
	#include <cctype>

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
	if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC,
	options::OPT_fminimize_whitespace,
	options::OPT_fno_minimize_whitespace)) {
	if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) &&
	!Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) {
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< A->getBaseArg().getAsString(Args)
	<< (D.IsCLMode() ? "/E, /P or /EP" : "-E");
	}
	}
	}

	static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
	// In gcc, only ARM checks this, but it seems reasonable to check universally.
	if (Args.hasArg(options::OPT_static))
	if (const Arg *A =
	Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
	D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
	<< "-static";
	}

	// Add backslashes to escape spaces and other backslashes.
	// This is used for the space-separated argument list specified with
	// the -dwarf-debug-flags option.
	static void EscapeSpacesAndBackslashes(const char *Arg,
	SmallVectorImpl<char> &Res) {
	for (; *Arg; ++Arg) {
	switch (*Arg) {
	default:
	break;
	case ' ':
	case '\\':
	Res.push_back('\\');
	break;
	}
	Res.push_back(*Arg);
	}
	}

	/// Apply \a Work on the current tool chain \a RegularToolChain and any other
	/// offloading tool chain that is associated with the current action \a JA.
	static void
	forAllAssociatedToolChains(Compilation &C, const JobAction &JA,
	const ToolChain &RegularToolChain,
	llvm::function_ref<void(const ToolChain &)> Work) {
	// Apply Work on the current/regular tool chain.
	Work(RegularToolChain);

	// Apply Work on all the offloading tool chains associated with the current
	// action.
	if (JA.isHostOffloading(Action::OFK_Cuda))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Cuda>());
	else if (JA.isDeviceOffloading(Action::OFK_Cuda))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
	else if (JA.isHostOffloading(Action::OFK_HIP))
	Work(*C.getSingleOffloadToolChain<Action::OFK_HIP>());
	else if (JA.isDeviceOffloading(Action::OFK_HIP))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());

	if (JA.isHostOffloading(Action::OFK_OpenMP)) {
	auto TCs = C.getOffloadToolChains<Action::OFK_OpenMP>();
	for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
	Work(*II->second);
	} else if (JA.isDeviceOffloading(Action::OFK_OpenMP))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());

	//
	// TODO: Add support for other offloading programming models here.
	//
	}

	/// This is a helper function for validating the optional refinement step
	/// parameter in reciprocal argument strings. Return false if there is an error
	/// parsing the refinement step. Otherwise, return true and set the Position
	/// of the refinement step in the input string.
	static bool getRefinementStep(StringRef In, const Driver &D,
	const Arg &A, size_t &Position) {
	const char RefinementStepToken = ':';
	Position = In.find(RefinementStepToken);
	if (Position != StringRef::npos) {
	StringRef Option = A.getOption().getName();
	StringRef RefStep = In.substr(Position + 1);
	// Allow exactly one numeric character for the additional refinement
	// step parameter. This is reasonable for all currently-supported
	// operations and architectures because we would expect that a larger value
	// of refinement steps would cause the estimate "optimization" to
	// under-perform the native operation. Also, if the estimate does not
	// converge quickly, it probably will not ever converge, so further
	// refinement steps will not produce a better answer.
	if (RefStep.size() != 1) {
	D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
	return false;
	}
	char RefStepChar = RefStep[0];
	if (RefStepChar < '0' \|\| RefStepChar > '9') {
	D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
	return false;
	}
	}
	return true;
	}

	/// The -mrecip flag requires processing of many optional parameters.
	static void ParseMRecip(const Driver &D, const ArgList &Args,
	ArgStringList &OutStrings) {
	StringRef DisabledPrefixIn = "!";
	StringRef DisabledPrefixOut = "!";
	StringRef EnabledPrefixOut = "";
	StringRef Out = "-mrecip=";

	Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ);
	if (!A)
	return;

	unsigned NumOptions = A->getNumValues();
	if (NumOptions == 0) {
	// No option is the same as "all".
	OutStrings.push_back(Args.MakeArgString(Out + "all"));
	return;
	}

	// Pass through "all", "none", or "default" with an optional refinement step.
	if (NumOptions == 1) {
	StringRef Val = A->getValue(0);
	size_t RefStepLoc;
	if (!getRefinementStep(Val, D, *A, RefStepLoc))
	return;
	StringRef ValBase = Val.slice(0, RefStepLoc);
	if (ValBase == "all" \|\| ValBase == "none" \|\| ValBase == "default") {
	OutStrings.push_back(Args.MakeArgString(Out + Val));
	return;
	}
	}

	// Each reciprocal type may be enabled or disabled individually.
	// Check each input value for validity, concatenate them all back together,
	// and pass through.

	llvm::StringMap<bool> OptionStrings;
	OptionStrings.insert(std::make_pair("divd", false));
	OptionStrings.insert(std::make_pair("divf", false));
	OptionStrings.insert(std::make_pair("divh", false));
	OptionStrings.insert(std::make_pair("vec-divd", false));
	OptionStrings.insert(std::make_pair("vec-divf", false));
	OptionStrings.insert(std::make_pair("vec-divh", false));
	OptionStrings.insert(std::make_pair("sqrtd", false));
	OptionStrings.insert(std::make_pair("sqrtf", false));
	OptionStrings.insert(std::make_pair("sqrth", false));
	OptionStrings.insert(std::make_pair("vec-sqrtd", false));
	OptionStrings.insert(std::make_pair("vec-sqrtf", false));
	OptionStrings.insert(std::make_pair("vec-sqrth", false));

	for (unsigned i = 0; i != NumOptions; ++i) {
	StringRef Val = A->getValue(i);

	bool IsDisabled = Val.startswith(DisabledPrefixIn);
	// Ignore the disablement token for string matching.
	if (IsDisabled)
	Val = Val.substr(1);

	size_t RefStep;
	if (!getRefinementStep(Val, D, *A, RefStep))
	return;

	StringRef ValBase = Val.slice(0, RefStep);
	llvm::StringMap<bool>::iterator OptionIter = OptionStrings.find(ValBase);
	if (OptionIter == OptionStrings.end()) {
	// Try again specifying float suffix.
	OptionIter = OptionStrings.find(ValBase.str() + 'f');
	if (OptionIter == OptionStrings.end()) {
	// The input name did not match any known option string.
	D.Diag(diag::err_drv_unknown_argument) << Val;
	return;
	}
	// The option was specified without a half or float or double suffix.
	// Make sure that the double or half entry was not already specified.
	// The float entry will be checked below.
	if (OptionStrings[ValBase.str() + 'd'] \|\|
	OptionStrings[ValBase.str() + 'h']) {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
	return;
	}
	}

	if (OptionIter->second == true) {
	// Duplicate option specified.
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
	return;
	}

	// Mark the matched option as found. Do not allow duplicate specifiers.
	OptionIter->second = true;

	// If the precision was not specified, also mark the double and half entry
	// as found.
	if (ValBase.back() != 'f' && ValBase.back() != 'd' && ValBase.back() != 'h') {
	OptionStrings[ValBase.str() + 'd'] = true;
	OptionStrings[ValBase.str() + 'h'] = true;
	}

	// Build the output string.
	StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut;
	Out = Args.MakeArgString(Out + Prefix + Val);
	if (i != NumOptions - 1)
	Out = Args.MakeArgString(Out + ",");
	}

	OutStrings.push_back(Args.MakeArgString(Out));
	}

	/// The -mprefer-vector-width option accepts either a positive integer
	/// or the string "none".
	static void ParseMPreferVectorWidth(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ);
	if (!A)
	return;

	StringRef Value = A->getValue();
	if (Value == "none") {
	CmdArgs.push_back("-mprefer-vector-width=none");
	} else {
	unsigned Width;
	if (Value.getAsInteger(10, Width)) {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
	return;
	}
	CmdArgs.push_back(Args.MakeArgString("-mprefer-vector-width=" + Value));
	}
	}

	static bool
	shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
	const llvm::Triple &Triple) {
	// We use the zero-cost exception tables for Objective-C if the non-fragile
	// ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and
	// later.
	if (runtime.isNonFragile())
	return true;

	if (!Triple.isMacOSX())
	return false;

	return (!Triple.isMacOSXVersionLT(10, 5) &&
	(Triple.getArch() == llvm::Triple::x86_64 \|\|
	Triple.getArch() == llvm::Triple::arm));
	}

	/// Adds exception related arguments to the driver command arguments. There's a
	/// main flag, -fexceptions and also language specific flags to enable/disable
	/// C++ and Objective-C exceptions. This makes it possible to for example
	/// disable C++ exceptions but enable Objective-C exceptions.
	static bool addExceptionArgs(const ArgList &Args, types::ID InputType,
	const ToolChain &TC, bool KernelOrKext,
	const ObjCRuntime &objcRuntime,
	ArgStringList &CmdArgs) {
	const llvm::Triple &Triple = TC.getTriple();

	if (KernelOrKext) {
	// -mkernel and -fapple-kext imply no exceptions, so claim exception related
	// arguments now to avoid warnings about unused arguments.
	Args.ClaimAllArgs(options::OPT_fexceptions);
	Args.ClaimAllArgs(options::OPT_fno_exceptions);
	Args.ClaimAllArgs(options::OPT_fobjc_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_objc_exceptions);
	Args.ClaimAllArgs(options::OPT_fcxx_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions);
	Args.ClaimAllArgs(options::OPT_fasync_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_async_exceptions);
	return false;
	}

	// See if the user explicitly enabled exceptions.
	bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
	false);

	bool EHa = Args.hasFlag(options::OPT_fasync_exceptions,
	options::OPT_fno_async_exceptions, false);
	if (EHa) {
	CmdArgs.push_back("-fasync-exceptions");
	EH = true;
	}

	// Obj-C exceptions are enabled by default, regardless of -fexceptions. This
	// is not necessarily sensible, but follows GCC.
	if (types::isObjC(InputType) &&
	Args.hasFlag(options::OPT_fobjc_exceptions,
	options::OPT_fno_objc_exceptions, true)) {
	CmdArgs.push_back("-fobjc-exceptions");

	EH \|= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple);
	}

	if (types::isCXX(InputType)) {
	// Disable C++ EH by default on XCore and PS4/PS5.
	bool CXXExceptionsEnabled = Triple.getArch() != llvm::Triple::xcore &&
	!Triple.isPS() && !Triple.isDriverKit();
	Arg *ExceptionArg = Args.getLastArg(
	options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions,
	options::OPT_fexceptions, options::OPT_fno_exceptions);
	if (ExceptionArg)
	CXXExceptionsEnabled =
	ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) \|\|
	ExceptionArg->getOption().matches(options::OPT_fexceptions);

	if (CXXExceptionsEnabled) {
	CmdArgs.push_back("-fcxx-exceptions");

	EH = true;
	}
	}

	// OPT_fignore_exceptions means exception could still be thrown,
	// but no clean up or catch would happen in current module.
	// So we do not set EH to false.
	Args.AddLastArg(CmdArgs, options::OPT_fignore_exceptions);

	if (EH)
	CmdArgs.push_back("-fexceptions");
	return EH;
	}

	static bool ShouldEnableAutolink(const ArgList &Args, const ToolChain &TC,
	const JobAction &JA) {
	bool Default = true;
	if (TC.getTriple().isOSDarwin()) {
	// The native darwin assembler doesn't support the linker_option directives,
	// so we disable them if we think the .s file will be passed to it.
	Default = TC.useIntegratedAs();
	}
	// The linker_option directives are intended for host compilation.
	if (JA.isDeviceOffloading(Action::OFK_Cuda) \|\|
	JA.isDeviceOffloading(Action::OFK_HIP))
	Default = false;
	return Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink,
	Default);
	}

	// Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases
	// to the corresponding DebugInfoKind.
	static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
	assert(A.getOption().matches(options::OPT_gN_Group) &&
	"Not a -g option that specifies a debug-info level");
	if (A.getOption().matches(options::OPT_g0) \|\|
	A.getOption().matches(options::OPT_ggdb0))
	return codegenoptions::NoDebugInfo;
	if (A.getOption().matches(options::OPT_gline_tables_only) \|\|
	A.getOption().matches(options::OPT_ggdb1))
	return codegenoptions::DebugLineTablesOnly;
	if (A.getOption().matches(options::OPT_gline_directives_only))
	return codegenoptions::DebugDirectivesOnly;
	return codegenoptions::DebugInfoConstructor;
	}

	static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
	switch (Triple.getArch()){
	default:
	return false;
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	// ARM Darwin targets require a frame pointer to be always present to aid
	// offline debugging via backtraces.
	return Triple.isOSDarwin();
	}
	}

	static bool useFramePointerForTargetByDefault(const ArgList &Args,
	const llvm::Triple &Triple) {
	if (Args.hasArg(options::OPT_pg) && !Args.hasArg(options::OPT_mfentry))
	return true;

	switch (Triple.getArch()) {
	case llvm::Triple::xcore:
	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	case llvm::Triple::msp430:
	// XCore never wants frame pointers, regardless of OS.
	// WebAssembly never wants frame pointers.
	return false;
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	case llvm::Triple::sparcv9:
	case llvm::Triple::amdgcn:
	case llvm::Triple::r600:
	case llvm::Triple::csky:
	case llvm::Triple::loongarch32:
	case llvm::Triple::loongarch64:
	return !areOptimizationsEnabled(Args);
	default:
	break;
	}

	if (Triple.isOSFuchsia() \|\| Triple.isOSNetBSD()) {
	return !areOptimizationsEnabled(Args);
	}

	if (Triple.isOSLinux() \|\| Triple.getOS() == llvm::Triple::CloudABI \|\|
	Triple.isOSHurd()) {
	switch (Triple.getArch()) {
	// Don't use a frame pointer on linux if optimizing for certain targets.
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	if (Triple.isAndroid())
	return true;
	[[fallthrough]];
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::systemz:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	return !areOptimizationsEnabled(Args);
	default:
	return true;
	}
	}

	if (Triple.isOSWindows()) {
	switch (Triple.getArch()) {
	case llvm::Triple::x86:
	return !areOptimizationsEnabled(Args);
	case llvm::Triple::x86_64:
	return Triple.isOSBinFormatMachO();
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	// Windows on ARM builds with FPO disabled to aid fast stack walking
	return true;
	default:
	// All other supported Windows ISAs use xdata unwind information, so frame
	// pointers are not generally useful.
	return false;
	}
	}

	return true;
	}

	static CodeGenOptions::FramePointerKind
	getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) {
	// We have 4 states:
	//
	// 00) leaf retained, non-leaf retained
	// 01) leaf retained, non-leaf omitted (this is invalid)
	// 10) leaf omitted, non-leaf retained
	// (what -momit-leaf-frame-pointer was designed for)
	// 11) leaf omitted, non-leaf omitted
	//
	// "omit" options taking precedence over "no-omit" options is the only way
	// to make 3 valid states representable
	Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
	options::OPT_fno_omit_frame_pointer);
	bool OmitFP = A && A->getOption().matches(options::OPT_fomit_frame_pointer);
	bool NoOmitFP =
	A && A->getOption().matches(options::OPT_fno_omit_frame_pointer);
	bool OmitLeafFP =
	Args.hasFlag(options::OPT_momit_leaf_frame_pointer,
	options::OPT_mno_omit_leaf_frame_pointer,
	Triple.isAArch64() \|\| Triple.isPS() \|\| Triple.isVE());
	if (NoOmitFP \|\| mustUseNonLeafFramePointerForTarget(Triple) \|\|
	(!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) {
	if (OmitLeafFP)
	return CodeGenOptions::FramePointerKind::NonLeaf;
	return CodeGenOptions::FramePointerKind::All;
	}
	return CodeGenOptions::FramePointerKind::None;
	}

	/// Add a CC1 option to specify the debug compilation directory.
	static const char *addDebugCompDirArg(const ArgList &Args,
	ArgStringList &CmdArgs,
	const llvm::vfs::FileSystem &VFS) {
	if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
	options::OPT_fdebug_compilation_dir_EQ)) {
	if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
	CmdArgs.push_back(Args.MakeArgString(Twine("-fdebug-compilation-dir=") +
	A->getValue()));
	else
	A->render(Args, CmdArgs);
	} else if (llvm::ErrorOr<std::string> CWD =
	VFS.getCurrentWorkingDirectory()) {
	CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD));
	}
	StringRef Path(CmdArgs.back());
	return Path.substr(Path.find('=') + 1).data();
	}

	static void addDebugObjectName(const ArgList &Args, ArgStringList &CmdArgs,
	const char *DebugCompilationDir,
	const char *OutputFileName) {
	// No need to generate a value for -object-file-name if it was provided.
	for (auto *Arg : Args.filtered(options::OPT_Xclang))
	if (StringRef(Arg->getValue()).startswith("-object-file-name"))
	return;

	if (Args.hasArg(options::OPT_object_file_name_EQ))
	return;

	SmallString<128> ObjFileNameForDebug(OutputFileName);
	if (ObjFileNameForDebug != "-" &&
	!llvm::sys::path::is_absolute(ObjFileNameForDebug) &&
	(!DebugCompilationDir \|\|
	llvm::sys::path::is_absolute(DebugCompilationDir))) {
	// Make the path absolute in the debug infos like MSVC does.
	llvm::sys::fs::make_absolute(ObjFileNameForDebug);
	}
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-object-file-name=") + ObjFileNameForDebug));
	}

	/// Add a CC1 and CC1AS option to specify the debug file path prefix map.
	static void addDebugPrefixMapArg(const Driver &D, const ToolChain &TC,
	const ArgList &Args, ArgStringList &CmdArgs) {
	auto AddOneArg = [&](StringRef Map, StringRef Name) {
	if (!Map.contains('='))
	D.Diag(diag::err_drv_invalid_argument_to_option) << Map << Name;
	else
	CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map));
	};

	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fdebug_prefix_map_EQ)) {
	AddOneArg(A->getValue(), A->getOption().getName());
	A->claim();
	}
	std::string GlobalRemapEntry = TC.GetGlobalDebugPathRemapping();
	if (GlobalRemapEntry.empty())
	return;
	AddOneArg(GlobalRemapEntry, "environment");
	}

	/// Add a CC1 and CC1AS option to specify the macro file path prefix map.
	static void addMacroPrefixMapArg(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fmacro_prefix_map_EQ)) {
	StringRef Map = A->getValue();
	if (!Map.contains('='))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Map << A->getOption().getName();
	else
	CmdArgs.push_back(Args.MakeArgString("-fmacro-prefix-map=" + Map));
	A->claim();
	}
	}

	/// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
	static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fcoverage_prefix_map_EQ)) {
	StringRef Map = A->getValue();
	if (!Map.contains('='))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Map << A->getOption().getName();
	else
	CmdArgs.push_back(Args.MakeArgString("-fcoverage-prefix-map=" + Map));
	A->claim();
	}
	}

	/// Vectorize at all optimization levels greater than 1 except for -Oz.
	/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
	/// enabled.
	static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O4) \|\|
	A->getOption().matches(options::OPT_Ofast))
	return true;

	if (A->getOption().matches(options::OPT_O0))
	return false;

	assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");

	// Vectorize -Os.
	StringRef S(A->getValue());
	if (S == "s")
	return true;

	// Don't vectorize -Oz, unless it's the slp vectorizer.
	if (S == "z")
	return isSlpVec;

	unsigned OptLevel = 0;
	if (S.getAsInteger(10, OptLevel))
	return false;

	return OptLevel > 1;
	}

	return false;
	}

	/// Add -x lang to \p CmdArgs for \p Input.
	static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
	ArgStringList &CmdArgs) {
	// When using -verify-pch, we don't want to provide the type
	// 'precompiled-header' if it was inferred from the file extension
	if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH)
	return;

	CmdArgs.push_back("-x");
	if (Args.hasArg(options::OPT_rewrite_objc))
	CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX));
	else {
	// Map the driver type to the frontend type. This is mostly an identity
	// mapping, except that the distinction between module interface units
	// and other source files does not exist at the frontend layer.
	const char *ClangType;
	switch (Input.getType()) {
	case types::TY_CXXModule:
	ClangType = "c++";
	break;
	case types::TY_PP_CXXModule:
	ClangType = "c++-cpp-output";
	break;
	default:
	ClangType = types::getTypeName(Input.getType());
	break;
	}
	CmdArgs.push_back(ClangType);
	}
	}

	static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
	const Driver &D, const InputInfo &Output,
	const ArgList &Args, SanitizerArgs &SanArgs,
	ArgStringList &CmdArgs) {

	auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
	options::OPT_fprofile_generate_EQ,
	options::OPT_fno_profile_generate);
	if (PGOGenerateArg &&
	PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
	PGOGenerateArg = nullptr;

	auto *CSPGOGenerateArg = Args.getLastArg(options::OPT_fcs_profile_generate,
	options::OPT_fcs_profile_generate_EQ,
	options::OPT_fno_profile_generate);
	if (CSPGOGenerateArg &&
	CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
	CSPGOGenerateArg = nullptr;

	auto *ProfileGenerateArg = Args.getLastArg(
	options::OPT_fprofile_instr_generate,
	options::OPT_fprofile_instr_generate_EQ,
	options::OPT_fno_profile_instr_generate);
	if (ProfileGenerateArg &&
	ProfileGenerateArg->getOption().matches(
	options::OPT_fno_profile_instr_generate))
	ProfileGenerateArg = nullptr;

	if (PGOGenerateArg && ProfileGenerateArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling();

	auto *ProfileUseArg = getLastProfileUseArg(Args);

	if (PGOGenerateArg && ProfileUseArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling();

	if (ProfileGenerateArg && ProfileUseArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling();

	if (CSPGOGenerateArg && PGOGenerateArg) {
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< CSPGOGenerateArg->getSpelling() << PGOGenerateArg->getSpelling();
	PGOGenerateArg = nullptr;
	}

	if (TC.getTriple().isOSAIX()) {
	if (ProfileGenerateArg)
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< ProfileGenerateArg->getSpelling() << TC.getTriple().str();
	if (Arg *ProfileSampleUseArg = getLastProfileSampleUseArg(Args))
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< ProfileSampleUseArg->getSpelling() << TC.getTriple().str();
	}

	if (ProfileGenerateArg) {
	if (ProfileGenerateArg->getOption().matches(
	options::OPT_fprofile_instr_generate_EQ))
	CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") +
	ProfileGenerateArg->getValue()));
	// The default is to use Clang Instrumentation.
	CmdArgs.push_back("-fprofile-instrument=clang");
	if (TC.getTriple().isWindowsMSVCEnvironment()) {
	// Add dependent lib for clang_rt.profile
	CmdArgs.push_back(Args.MakeArgString(
	"--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile")));
	}
	}

	Arg *PGOGenArg = nullptr;
	if (PGOGenerateArg) {
	assert(!CSPGOGenerateArg);
	PGOGenArg = PGOGenerateArg;
	CmdArgs.push_back("-fprofile-instrument=llvm");
	}
	if (CSPGOGenerateArg) {
	assert(!PGOGenerateArg);
	PGOGenArg = CSPGOGenerateArg;
	CmdArgs.push_back("-fprofile-instrument=csllvm");
	}
	if (PGOGenArg) {
	if (TC.getTriple().isWindowsMSVCEnvironment()) {
	// Add dependent lib for clang_rt.profile
	CmdArgs.push_back(Args.MakeArgString(
	"--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile")));
	}
	if (PGOGenArg->getOption().matches(
	PGOGenerateArg ? options::OPT_fprofile_generate_EQ
	: options::OPT_fcs_profile_generate_EQ)) {
	SmallString<128> Path(PGOGenArg->getValue());
	llvm::sys::path::append(Path, "default_%m.profraw");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path));
	}
	}

	if (ProfileUseArg) {
	if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
	else if ((ProfileUseArg->getOption().matches(
	options::OPT_fprofile_use_EQ) \|\|
	ProfileUseArg->getOption().matches(
	options::OPT_fprofile_instr_use))) {
	SmallString<128> Path(
	ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
	if (Path.empty() \|\| llvm::sys::fs::is_directory(Path))
	llvm::sys::path::append(Path, "default.profdata");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
	}
	}

	bool EmitCovNotes = Args.hasFlag(options::OPT_ftest_coverage,
	options::OPT_fno_test_coverage, false) \|\|
	Args.hasArg(options::OPT_coverage);
	bool EmitCovData = TC.needsGCovInstrumentation(Args);
	if (EmitCovNotes)
	CmdArgs.push_back("-ftest-coverage");
	if (EmitCovData)
	CmdArgs.push_back("-fprofile-arcs");

	if (Args.hasFlag(options::OPT_fcoverage_mapping,
	options::OPT_fno_coverage_mapping, false)) {
	if (!ProfileGenerateArg)
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< "-fcoverage-mapping"
	<< "-fprofile-instr-generate";

	CmdArgs.push_back("-fcoverage-mapping");
	}

	if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
	options::OPT_fcoverage_compilation_dir_EQ)) {
	if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-fcoverage-compilation-dir=") + A->getValue()));
	else
	A->render(Args, CmdArgs);
	} else if (llvm::ErrorOr<std::string> CWD =
	D.getVFS().getCurrentWorkingDirectory()) {
	CmdArgs.push_back(Args.MakeArgString("-fcoverage-compilation-dir=" + *CWD));
	}

	if (Args.hasArg(options::OPT_fprofile_exclude_files_EQ)) {
	auto *Arg = Args.getLastArg(options::OPT_fprofile_exclude_files_EQ);
	if (!Args.hasArg(options::OPT_coverage))
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< "-fprofile-exclude-files="
	<< "--coverage";

	StringRef v = Arg->getValue();
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fprofile-exclude-files=" + v)));
	}

	if (Args.hasArg(options::OPT_fprofile_filter_files_EQ)) {
	auto *Arg = Args.getLastArg(options::OPT_fprofile_filter_files_EQ);
	if (!Args.hasArg(options::OPT_coverage))
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< "-fprofile-filter-files="
	<< "--coverage";

	StringRef v = Arg->getValue();
	CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-filter-files=" + v)));
	}

	if (const auto *A = Args.getLastArg(options::OPT_fprofile_update_EQ)) {
	StringRef Val = A->getValue();
	if (Val == "atomic" \|\| Val == "prefer-atomic")
	CmdArgs.push_back("-fprofile-update=atomic");
	else if (Val != "single")
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	} else if (SanArgs.needsTsanRt()) {
	CmdArgs.push_back("-fprofile-update=atomic");
	}

	int FunctionGroups = 1;
	int SelectedFunctionGroup = 0;
	if (const auto *A = Args.getLastArg(options::OPT_fprofile_function_groups)) {
	StringRef Val = A->getValue();
	if (Val.getAsInteger(0, FunctionGroups) \|\| FunctionGroups < 1)
	D.Diag(diag::err_drv_invalid_int_value) << A->getAsString(Args) << Val;
	}
	if (const auto *A =
	Args.getLastArg(options::OPT_fprofile_selected_function_group)) {
	StringRef Val = A->getValue();
	if (Val.getAsInteger(0, SelectedFunctionGroup) \|\|
	SelectedFunctionGroup < 0 \|\| SelectedFunctionGroup >= FunctionGroups)
	D.Diag(diag::err_drv_invalid_int_value) << A->getAsString(Args) << Val;
	}
	if (FunctionGroups != 1)
	CmdArgs.push_back(Args.MakeArgString("-fprofile-function-groups=" +
	Twine(FunctionGroups)));
	if (SelectedFunctionGroup != 0)
	CmdArgs.push_back(Args.MakeArgString("-fprofile-selected-function-group=" +
	Twine(SelectedFunctionGroup)));

	// Leave -fprofile-dir= an unused argument unless .gcda emission is
	// enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider
	// the flag used. There is no -fno-profile-dir, so the user has no
	// targeted way to suppress the warning.
	Arg *FProfileDir = nullptr;
	if (Args.hasArg(options::OPT_fprofile_arcs) \|\|
	Args.hasArg(options::OPT_coverage))
	FProfileDir = Args.getLastArg(options::OPT_fprofile_dir);

	// Put the .gcno and .gcda files (if needed) next to the object file or
	// bitcode file in the case of LTO.
	// FIXME: There should be a simpler way to find the object file for this
	// input, and this code probably does the wrong thing for commands that
	// compile and link all at once.
	if ((Args.hasArg(options::OPT_c) \|\| Args.hasArg(options::OPT_S)) &&
	(EmitCovNotes \|\| EmitCovData) && Output.isFilename()) {
	SmallString<128> OutputFilename;
	if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT__SLASH_Fo))
	OutputFilename = FinalOutput->getValue();
	else if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
	OutputFilename = FinalOutput->getValue();
	else
	OutputFilename = llvm::sys::path::filename(Output.getBaseInput());
	SmallString<128> CoverageFilename = OutputFilename;
	if (llvm::sys::path::is_relative(CoverageFilename))
	(void)D.getVFS().makeAbsolute(CoverageFilename);
	llvm::sys::path::replace_extension(CoverageFilename, "gcno");

	CmdArgs.push_back("-coverage-notes-file");
	CmdArgs.push_back(Args.MakeArgString(CoverageFilename));

	if (EmitCovData) {
	if (FProfileDir) {
	CoverageFilename = FProfileDir->getValue();
	llvm::sys::path::append(CoverageFilename, OutputFilename);
	}
	llvm::sys::path::replace_extension(CoverageFilename, "gcda");
	CmdArgs.push_back("-coverage-data-file");
	CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
	}
	}
	}

	/// Check whether the given input tree contains any compilation actions.
	static bool ContainsCompileAction(const Action *A) {
	if (isa<CompileJobAction>(A) \|\| isa<BackendJobAction>(A))
	return true;

	return llvm::any_of(A->inputs(), ContainsCompileAction);
	}

	/// Check if -relax-all should be passed to the internal assembler.
	/// This is done by default when compiling non-assembler source with -O0.
	static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
	bool RelaxDefault = true;

	if (Arg *A = Args.getLastArg(options::OPT_O_Group))
	RelaxDefault = A->getOption().matches(options::OPT_O0);

	if (RelaxDefault) {
	RelaxDefault = false;
	for (const auto &Act : C.getActions()) {
	if (ContainsCompileAction(Act)) {
	RelaxDefault = true;
	break;
	}
	}
	}

	return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
	RelaxDefault);
	}

	static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
	codegenoptions::DebugInfoKind DebugInfoKind,
	unsigned DwarfVersion,
	llvm::DebuggerKind DebuggerTuning) {
	switch (DebugInfoKind) {
	case codegenoptions::DebugDirectivesOnly:
	CmdArgs.push_back("-debug-info-kind=line-directives-only");
	break;
	case codegenoptions::DebugLineTablesOnly:
	CmdArgs.push_back("-debug-info-kind=line-tables-only");
	break;
	case codegenoptions::DebugInfoConstructor:
	CmdArgs.push_back("-debug-info-kind=constructor");
	break;
	case codegenoptions::LimitedDebugInfo:
	CmdArgs.push_back("-debug-info-kind=limited");
	break;
	case codegenoptions::FullDebugInfo:
	CmdArgs.push_back("-debug-info-kind=standalone");
	break;
	case codegenoptions::UnusedTypeInfo:
	CmdArgs.push_back("-debug-info-kind=unused-types");
	break;
	default:
	break;
	}
	if (DwarfVersion > 0)
	CmdArgs.push_back(
	Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion)));
	switch (DebuggerTuning) {
	case llvm::DebuggerKind::GDB:
	CmdArgs.push_back("-debugger-tuning=gdb");
	break;
	case llvm::DebuggerKind::LLDB:
	CmdArgs.push_back("-debugger-tuning=lldb");
	break;
	case llvm::DebuggerKind::SCE:
	CmdArgs.push_back("-debugger-tuning=sce");
	break;
	case llvm::DebuggerKind::DBX:
	CmdArgs.push_back("-debugger-tuning=dbx");
	break;
	default:
	break;
	}
	}

	static bool checkDebugInfoOption(const Arg *A, const ArgList &Args,
	const Driver &D, const ToolChain &TC) {
	assert(A && "Expected non-nullptr argument.");
	if (TC.supportsDebugInfoOption(A))
	return true;
	D.Diag(diag::warn_drv_unsupported_debug_info_opt_for_target)
	<< A->getAsString(Args) << TC.getTripleString();
	return false;
	}

	static void RenderDebugInfoCompressionArgs(const ArgList &Args,
	ArgStringList &CmdArgs,
	const Driver &D,
	const ToolChain &TC) {
	const Arg *A = Args.getLastArg(options::OPT_gz_EQ);
	if (!A)
	return;
	if (checkDebugInfoOption(A, Args, D, TC)) {
	StringRef Value = A->getValue();
	if (Value == "none") {
	CmdArgs.push_back("--compress-debug-sections=none");
	} else if (Value == "zlib") {
	if (llvm::compression::zlib::isAvailable()) {
	CmdArgs.push_back(
	Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
	} else {
	D.Diag(diag::warn_debug_compression_unavailable) << "zlib";
	}
	} else if (Value == "zstd") {
	if (llvm::compression::zstd::isAvailable()) {
	CmdArgs.push_back(
	Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
	} else {
	D.Diag(diag::warn_debug_compression_unavailable) << "zstd";
	}
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Value;
	}
	}
	}

	static void handleAMDGPUCodeObjectVersionOptions(const Driver &D,
	const ArgList &Args,
	ArgStringList &CmdArgs,
	bool IsCC1As = false) {
	// If no version was requested by the user, use the default value from the
	// back end. This is consistent with the value returned from
	// getAMDGPUCodeObjectVersion. This lets clang emit IR for amdgpu without
	// requiring the corresponding llvm to have the AMDGPU target enabled,
	// provided the user (e.g. front end tests) can use the default.
	if (haveAMDGPUCodeObjectVersionArgument(D, Args)) {
	unsigned CodeObjVer = getAMDGPUCodeObjectVersion(D, Args);
	CmdArgs.insert(CmdArgs.begin() + 1,
	Args.MakeArgString(Twine("--amdhsa-code-object-version=") +
	Twine(CodeObjVer)));
	CmdArgs.insert(CmdArgs.begin() + 1, "-mllvm");
	// -cc1as does not accept -mcode-object-version option.
	if (!IsCC1As)
	CmdArgs.insert(CmdArgs.begin() + 1,
	Args.MakeArgString(Twine("-mcode-object-version=") +
	Twine(CodeObjVer)));
	}
	}

	void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
	const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs,
	const InputInfo &Output,
	const InputInfoList &Inputs) const {
	const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();

	CheckPreprocessingOptions(D, Args);

	Args.AddLastArg(CmdArgs, options::OPT_C);
	Args.AddLastArg(CmdArgs, options::OPT_CC);

	// Handle dependency file generation.
	Arg *ArgM = Args.getLastArg(options::OPT_MM);
	if (!ArgM)
	ArgM = Args.getLastArg(options::OPT_M);
	Arg *ArgMD = Args.getLastArg(options::OPT_MMD);
	if (!ArgMD)
	ArgMD = Args.getLastArg(options::OPT_MD);

	// -M and -MM imply -w.
	if (ArgM)
	CmdArgs.push_back("-w");
	else
	ArgM = ArgMD;

	if (ArgM) {
	// Determine the output location.
	const char *DepFile;
	if (Arg *MF = Args.getLastArg(options::OPT_MF)) {
	DepFile = MF->getValue();
	C.addFailureResultFile(DepFile, &JA);
	} else if (Output.getType() == types::TY_Dependencies) {
	DepFile = Output.getFilename();
	} else if (!ArgMD) {
	DepFile = "-";
	} else {
	DepFile = getDependencyFileName(Args, Inputs);
	C.addFailureResultFile(DepFile, &JA);
	}
	CmdArgs.push_back("-dependency-file");
	CmdArgs.push_back(DepFile);

	bool HasTarget = false;
	for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) {
	HasTarget = true;
	A->claim();
	if (A->getOption().matches(options::OPT_MT)) {
	A->render(Args, CmdArgs);
	} else {
	CmdArgs.push_back("-MT");
	SmallString<128> Quoted;
	quoteMakeTarget(A->getValue(), Quoted);
	CmdArgs.push_back(Args.MakeArgString(Quoted));
	}
	}

	// Add a default target if one wasn't specified.
	if (!HasTarget) {
	const char *DepTarget;

	// If user provided -o, that is the dependency target, except
	// when we are only generating a dependency file.
	Arg *OutputOpt = Args.getLastArg(options::OPT_o);
	if (OutputOpt && Output.getType() != types::TY_Dependencies) {
	DepTarget = OutputOpt->getValue();
	} else {
	// Otherwise derive from the base input.
	//
	// FIXME: This should use the computed output file location.
	SmallString<128> P(Inputs[0].getBaseInput());
	llvm::sys::path::replace_extension(P, "o");
	DepTarget = Args.MakeArgString(llvm::sys::path::filename(P));
	}

	CmdArgs.push_back("-MT");
	SmallString<128> Quoted;
	quoteMakeTarget(DepTarget, Quoted);
	CmdArgs.push_back(Args.MakeArgString(Quoted));
	}

	if (ArgM->getOption().matches(options::OPT_M) \|\|
	ArgM->getOption().matches(options::OPT_MD))
	CmdArgs.push_back("-sys-header-deps");
	if ((isa<PrecompileJobAction>(JA) &&
	!Args.hasArg(options::OPT_fno_module_file_deps)) \|\|
	Args.hasArg(options::OPT_fmodule_file_deps))
	CmdArgs.push_back("-module-file-deps");
	}

	if (Args.hasArg(options::OPT_MG)) {
	if (!ArgM \|\| ArgM->getOption().matches(options::OPT_MD) \|\|
	ArgM->getOption().matches(options::OPT_MMD))
	D.Diag(diag::err_drv_mg_requires_m_or_mm);
	CmdArgs.push_back("-MG");
	}

	Args.AddLastArg(CmdArgs, options::OPT_MP);
	Args.AddLastArg(CmdArgs, options::OPT_MV);

	// Add offload include arguments specific for CUDA/HIP. This must happen
	// before we -I or -include anything else, because we must pick up the
	// CUDA/HIP headers from the particular CUDA/ROCm installation, rather than
	// from e.g. /usr/local/include.
	if (JA.isOffloading(Action::OFK_Cuda))
	getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
	if (JA.isOffloading(Action::OFK_HIP))
	getToolChain().AddHIPIncludeArgs(Args, CmdArgs);

	// If we are offloading to a target via OpenMP we need to include the
	// openmp_wrappers folder which contains alternative system headers.
	if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
	!Args.hasArg(options::OPT_nostdinc) &&
	!Args.hasArg(options::OPT_nogpuinc) &&
	(getToolChain().getTriple().isNVPTX() \|\|
	getToolChain().getTriple().isAMDGCN())) {
	if (!Args.hasArg(options::OPT_nobuiltininc)) {
	// Add openmp_wrappers/* to our system include path. This lets us wrap
	// standard library headers.
	SmallString<128> P(D.ResourceDir);
	llvm::sys::path::append(P, "include");
	llvm::sys::path::append(P, "openmp_wrappers");
	CmdArgs.push_back("-internal-isystem");
	CmdArgs.push_back(Args.MakeArgString(P));
	}

	CmdArgs.push_back("-include");
	CmdArgs.push_back("__clang_openmp_device_functions.h");
	}

	// Add -i* options, and automatically translate to
	// -include-pch/-include-pth for transparent PCH support. It's
	// wonky, but we include looking for .gch so we can support seamless
	// replacement into a build system already set up to be generating
	// .gch files.

	if (getToolChain().getDriver().IsCLMode()) {
	const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
	const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
	if (YcArg && JA.getKind() >= Action::PrecompileJobClass &&
	JA.getKind() <= Action::AssembleJobClass) {
	CmdArgs.push_back(Args.MakeArgString("-building-pch-with-obj"));
	// -fpch-instantiate-templates is the default when creating
	// precomp using /Yc
	if (Args.hasFlag(options::OPT_fpch_instantiate_templates,
	options::OPT_fno_pch_instantiate_templates, true))
	CmdArgs.push_back(Args.MakeArgString("-fpch-instantiate-templates"));
	}
	if (YcArg \|\| YuArg) {
	StringRef ThroughHeader = YcArg ? YcArg->getValue() : YuArg->getValue();
	if (!isa<PrecompileJobAction>(JA)) {
	CmdArgs.push_back("-include-pch");
	CmdArgs.push_back(Args.MakeArgString(D.GetClPchPath(
	C, !ThroughHeader.empty()
	? ThroughHeader
	: llvm::sys::path::filename(Inputs[0].getBaseInput()))));
	}

	if (ThroughHeader.empty()) {
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-pch-through-hdrstop-") + (YcArg ? "create" : "use")));
	} else {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-pch-through-header=") + ThroughHeader));
	}
	}
	}

	bool RenderedImplicitInclude = false;
	for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
	if (A->getOption().matches(options::OPT_include) &&
	D.getProbePrecompiled()) {
	// Handling of gcc-style gch precompiled headers.
	bool IsFirstImplicitInclude = !RenderedImplicitInclude;
	RenderedImplicitInclude = true;

	bool FoundPCH = false;
	SmallString<128> P(A->getValue());
	// We want the files to have a name like foo.h.pch. Add a dummy extension
	// so that replace_extension does the right thing.
	P += ".dummy";
	llvm::sys::path::replace_extension(P, "pch");
	if (D.getVFS().exists(P))
	FoundPCH = true;

	if (!FoundPCH) {
	llvm::sys::path::replace_extension(P, "gch");
	if (D.getVFS().exists(P)) {
	FoundPCH = true;
	}
	}

	if (FoundPCH) {
	if (IsFirstImplicitInclude) {
	A->claim();
	CmdArgs.push_back("-include-pch");
	CmdArgs.push_back(Args.MakeArgString(P));
	continue;
	} else {
	// Ignore the PCH if not first on command line and emit warning.
	D.Diag(diag::warn_drv_pch_not_first_include) << P
	<< A->getAsString(Args);
	}
	}
	} else if (A->getOption().matches(options::OPT_isystem_after)) {
	// Handling of paths which must come late. These entries are handled by
	// the toolchain itself after the resource dir is inserted in the right
	// search order.
	// Do not claim the argument so that the use of the argument does not
	// silently go unnoticed on toolchains which do not honour the option.
	continue;
	} else if (A->getOption().matches(options::OPT_stdlibxx_isystem)) {
	// Translated to -internal-isystem by the driver, no need to pass to cc1.
	continue;
	}

	// Not translated, render as usual.
	A->claim();
	A->render(Args, CmdArgs);
	}

	Args.AddAllArgs(CmdArgs,
	{options::OPT_D, options::OPT_U, options::OPT_I_Group,
	options::OPT_F, options::OPT_index_header_map});

	// Add -Wp, and -Xpreprocessor if using the preprocessor.

	// FIXME: There is a very unfortunate problem here, some troubled
	// souls abuse -Wp, to pass preprocessor options in gcc syntax. To
	// really support that we would have to parse and then translate
	// those options. :(
	Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA,
	options::OPT_Xpreprocessor);

	// -I- is a deprecated GCC feature, reject it.
	if (Arg *A = Args.getLastArg(options::OPT_I_))
	D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args);

	// If we have a --sysroot, and don't have an explicit -isysroot flag, add an
	// -isysroot to the CC1 invocation.
	StringRef sysroot = C.getSysRoot();
	if (sysroot != "") {
	if (!Args.hasArg(options::OPT_isysroot)) {
	CmdArgs.push_back("-isysroot");
	CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
	}
	}

	// Parse additional include paths from environment variables.
	// FIXME: We should probably sink the logic for handling these from the
	// frontend into the driver. It will allow deleting 4 otherwise unused flags.
	// CPATH - included following the user specified includes (but prior to
	// builtin and standard includes).
	addDirectoryList(Args, CmdArgs, "-I", "CPATH");
	// C_INCLUDE_PATH - system includes enabled when compiling C.
	addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH");
	// CPLUS_INCLUDE_PATH - system includes enabled when compiling C++.
	addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH");
	// OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC.
	addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH");
	// OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
	addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");

	// While adding the include arguments, we also attempt to retrieve the
	// arguments of related offloading toolchains or arguments that are specific
	// of an offloading programming model.

	// Add C++ include arguments, if needed.
	if (types::isCXX(Inputs[0].getType())) {
	bool HasStdlibxxIsystem = Args.hasArg(options::OPT_stdlibxx_isystem);
	forAllAssociatedToolChains(
	C, JA, getToolChain(),
	[&Args, &CmdArgs, HasStdlibxxIsystem](const ToolChain &TC) {
	HasStdlibxxIsystem ? TC.AddClangCXXStdlibIsystemArgs(Args, CmdArgs)
	: TC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
	});
	}

	// Add system include arguments for all targets but IAMCU.
	if (!IsIAMCU)
	forAllAssociatedToolChains(C, JA, getToolChain(),
	[&Args, &CmdArgs](const ToolChain &TC) {
	TC.AddClangSystemIncludeArgs(Args, CmdArgs);
	});
	else {
	// For IAMCU add special include arguments.
	getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs);
	}

	addMacroPrefixMapArg(D, Args, CmdArgs);
	addCoveragePrefixMapArg(D, Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_ffile_reproducible,
	options::OPT_fno_file_reproducible);

	if (const char *Epoch = std::getenv("SOURCE_DATE_EPOCH")) {
	CmdArgs.push_back("-source-date-epoch");
	CmdArgs.push_back(Args.MakeArgString(Epoch));
	}
	}

	// FIXME: Move to target hook.
	static bool isSignedCharDefault(const llvm::Triple &Triple) {
	switch (Triple.getArch()) {
	default:
	return true;

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	if (Triple.isOSDarwin() \|\| Triple.isOSWindows())
	return true;
	return false;

	case llvm::Triple::ppc:
	case llvm::Triple::ppc64:
	if (Triple.isOSDarwin())
	return true;
	return false;

	case llvm::Triple::hexagon:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64le:
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	case llvm::Triple::systemz:
	case llvm::Triple::xcore:
	return false;
	}
	}

	static bool hasMultipleInvocations(const llvm::Triple &Triple,
	const ArgList &Args) {
	// Supported only on Darwin where we invoke the compiler multiple times
	// followed by an invocation to lipo.
	if (!Triple.isOSDarwin())
	return false;
	// If more than one "-arch <arch>" is specified, we're targeting multiple
	// architectures resulting in a fat binary.
	return Args.getAllArgValues(options::OPT_arch).size() > 1;
	}

	static bool checkRemarksOptions(const Driver &D, const ArgList &Args,
	const llvm::Triple &Triple) {
	// When enabling remarks, we need to error if:
	// * The remark file is specified but we're targeting multiple architectures,
	// which means more than one remark file is being generated.
	bool hasMultipleInvocations = ::hasMultipleInvocations(Triple, Args);
	bool hasExplicitOutputFile =
	Args.getLastArg(options::OPT_foptimization_record_file_EQ);
	if (hasMultipleInvocations && hasExplicitOutputFile) {
	D.Diag(diag::err_drv_invalid_output_with_multiple_archs)
	<< "-foptimization-record-file";
	return false;
	}
	return true;
	}

	static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
	const llvm::Triple &Triple,
	const InputInfo &Input,
	const InputInfo &Output, const JobAction &JA) {
	StringRef Format = "yaml";
	if (const Arg *A = Args.getLastArg(options::OPT_fsave_optimization_record_EQ))
	Format = A->getValue();

	CmdArgs.push_back("-opt-record-file");

	const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
	if (A) {
	CmdArgs.push_back(A->getValue());
	} else {
	bool hasMultipleArchs =
	Triple.isOSDarwin() && // Only supported on Darwin platforms.
	Args.getAllArgValues(options::OPT_arch).size() > 1;

	SmallString<128> F;

	if (Args.hasArg(options::OPT_c) \|\| Args.hasArg(options::OPT_S)) {
	if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
	F = FinalOutput->getValue();
	} else {
	if (Format != "yaml" && // For YAML, keep the original behavior.
	Triple.isOSDarwin() && // Enable this only on darwin, since it's the only platform supporting .dSYM bundles.
	Output.isFilename())
	F = Output.getFilename();
	}

	if (F.empty()) {
	// Use the input filename.
	F = llvm::sys::path::stem(Input.getBaseInput());

	// If we're compiling for an offload architecture (i.e. a CUDA device),
	// we need to make the file name for the device compilation different
	// from the host compilation.
	if (!JA.isDeviceOffloading(Action::OFK_None) &&
	!JA.isDeviceOffloading(Action::OFK_Host)) {
	llvm::sys::path::replace_extension(F, "");
	F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(),
	Triple.normalize());
	F += "-";
	F += JA.getOffloadingArch();
	}
	}

	// If we're having more than one "-arch", we should name the files
	// differently so that every cc1 invocation writes to a different file.
	// We're doing that by appending "-<arch>" with "<arch>" being the arch
	// name from the triple.
	if (hasMultipleArchs) {
	// First, remember the extension.
	SmallString<64> OldExtension = llvm::sys::path::extension(F);
	// then, remove it.
	llvm::sys::path::replace_extension(F, "");
	// attach -<arch> to it.
	F += "-";
	F += Triple.getArchName();
	// put back the extension.
	llvm::sys::path::replace_extension(F, OldExtension);
	}

	SmallString<32> Extension;
	Extension += "opt.";
	Extension += Format;

	llvm::sys::path::replace_extension(F, Extension);
	CmdArgs.push_back(Args.MakeArgString(F));
	}

	if (const Arg *A =
	Args.getLastArg(options::OPT_foptimization_record_passes_EQ)) {
	CmdArgs.push_back("-opt-record-passes");
	CmdArgs.push_back(A->getValue());
	}

	if (!Format.empty()) {
	CmdArgs.push_back("-opt-record-format");
	CmdArgs.push_back(Format.data());
	}
	}

	void AddAAPCSVolatileBitfieldArgs(const ArgList &Args, ArgStringList &CmdArgs) {
	if (!Args.hasFlag(options::OPT_faapcs_bitfield_width,
	options::OPT_fno_aapcs_bitfield_width, true))
	CmdArgs.push_back("-fno-aapcs-bitfield-width");

	if (Args.getLastArg(options::OPT_ForceAAPCSBitfieldLoad))
	CmdArgs.push_back("-faapcs-bitfield-load");
	}

	namespace {
	void RenderARMABI(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args, ArgStringList &CmdArgs) {
	// Select the ABI to use.
	// FIXME: Support -meabi.
	// FIXME: Parts of this are duplicated in the backend, unify this somehow.
	const char *ABIName = nullptr;
	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
	ABIName = A->getValue();
	} else {
	std::string CPU = getCPUName(D, Args, Triple, /FromAs/ false);
	ABIName = llvm::ARM::computeDefaultTargetABI(Triple, CPU).data();
	}

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName);
	}

	void AddUnalignedAccessWarning(ArgStringList &CmdArgs) {
	auto StrictAlignIter =
	llvm::find_if(llvm::reverse(CmdArgs), [](StringRef Arg) {
	return Arg == "+strict-align" \|\| Arg == "-strict-align";
	});
	if (StrictAlignIter != CmdArgs.rend() &&
	StringRef(*StrictAlignIter) == "+strict-align")
	CmdArgs.push_back("-Wunaligned-access");
	}
	}

	static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs, bool isAArch64) {
	const Arg *A = isAArch64
	? Args.getLastArg(options::OPT_msign_return_address_EQ,
	options::OPT_mbranch_protection_EQ)
	: Args.getLastArg(options::OPT_mbranch_protection_EQ);
	if (!A)
	return;

	const Driver &D = TC.getDriver();
	const llvm::Triple &Triple = TC.getEffectiveTriple();
	if (!(isAArch64 \|\| (Triple.isArmT32() && Triple.isArmMClass())))
	D.Diag(diag::warn_incompatible_branch_protection_option)
	<< Triple.getArchName();

	StringRef Scope, Key;
	bool IndirectBranches;

	if (A->getOption().matches(options::OPT_msign_return_address_EQ)) {
	Scope = A->getValue();
	if (Scope != "none" && Scope != "non-leaf" && Scope != "all")
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Scope;
	Key = "a_key";
	IndirectBranches = false;
	} else {
	StringRef DiagMsg;
	llvm::ARM::ParsedBranchProtection PBP;
	if (!llvm::ARM::parseBranchProtection(A->getValue(), PBP, DiagMsg))
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << DiagMsg;
	if (!isAArch64 && PBP.Key == "b_key")
	D.Diag(diag::warn_unsupported_branch_protection)
	<< "b-key" << A->getAsString(Args);
	Scope = PBP.Scope;
	Key = PBP.Key;
	IndirectBranches = PBP.BranchTargetEnforcement;
	}

	CmdArgs.push_back(
	Args.MakeArgString(Twine("-msign-return-address=") + Scope));
	if (!Scope.equals("none"))
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-msign-return-address-key=") + Key));
	if (IndirectBranches)
	CmdArgs.push_back("-mbranch-target-enforce");
	}

	void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
	ArgStringList &CmdArgs, bool KernelOrKext) const {
	RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs);

	// Determine floating point ABI from the options & target defaults.
	arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
	if (ABI == arm::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	// FIXME: This changes CPP defines, we need -target-soft-float.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else if (ABI == arm::FloatABI::SoftFP) {
	// Floating point operations are hard, but argument passing is soft.
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(ABI == arm::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	// Forward the -mglobal-merge option for explicit control over the pass.
	if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
	options::OPT_mno_global_merge)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_mno_global_merge))
	CmdArgs.push_back("-arm-global-merge=false");
	else
	CmdArgs.push_back("-arm-global-merge=true");
	}

	if (!Args.hasFlag(options::OPT_mimplicit_float,
	options::OPT_mno_implicit_float, true))
	CmdArgs.push_back("-no-implicit-float");

	if (Args.getLastArg(options::OPT_mcmse))
	CmdArgs.push_back("-mcmse");

	AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);

	// Enable/disable return address signing and indirect branch targets.
	CollectARMPACBTIOptions(getToolChain(), Args, CmdArgs, false /isAArch64/);

	AddUnalignedAccessWarning(CmdArgs);
	}

	void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
	const ArgList &Args, bool KernelOrKext,
	ArgStringList &CmdArgs) const {
	const ToolChain &TC = getToolChain();

	// Add the target features
	getTargetFeatures(TC.getDriver(), EffectiveTriple, Args, CmdArgs, false);

	// Add target specific flags.
	switch (TC.getArch()) {
	default:
	break;

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	// Use the effective triple, which takes into account the deployment target.
	AddARMTargetArgs(EffectiveTriple, Args, CmdArgs, KernelOrKext);
	break;

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	AddAArch64TargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::loongarch32:
	case llvm::Triple::loongarch64:
	AddLoongArchTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	AddMIPSTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	AddPPCTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	AddRISCVTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	case llvm::Triple::sparcv9:
	AddSparcTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::systemz:
	AddSystemZTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	AddX86TargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::lanai:
	AddLanaiTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::hexagon:
	AddHexagonTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	AddWebAssemblyTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::ve:
	AddVETargetArgs(Args, CmdArgs);
	break;
	}
	}

	namespace {
	void RenderAArch64ABI(const llvm::Triple &Triple, const ArgList &Args,
	ArgStringList &CmdArgs) {
	const char *ABIName = nullptr;
	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
	ABIName = A->getValue();
	else if (Triple.isOSDarwin())
	ABIName = "darwinpcs";
	else
	ABIName = "aapcs";

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName);
	}
	}

	void Clang::AddAArch64TargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &Triple = getToolChain().getEffectiveTriple();

	if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) \|\|
	Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext))
	CmdArgs.push_back("-disable-red-zone");

	if (!Args.hasFlag(options::OPT_mimplicit_float,
	options::OPT_mno_implicit_float, true))
	CmdArgs.push_back("-no-implicit-float");

	RenderAArch64ABI(Triple, Args, CmdArgs);

	// Forward the -mglobal-merge option for explicit control over the pass.
	if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
	options::OPT_mno_global_merge)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_mno_global_merge))
	CmdArgs.push_back("-aarch64-enable-global-merge=false");
	else
	CmdArgs.push_back("-aarch64-enable-global-merge=true");
	}

	// Enable/disable return address signing and indirect branch targets.
	CollectARMPACBTIOptions(getToolChain(), Args, CmdArgs, true /isAArch64/);

	// Handle -msve_vector_bits=<bits>
	if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
	StringRef Val = A->getValue();
	const Driver &D = getToolChain().getDriver();
	if (Val.equals("128") \|\| Val.equals("256") \|\| Val.equals("512") \|\|
	Val.equals("1024") \|\| Val.equals("2048") \|\| Val.equals("128+") \|\|
	Val.equals("256+") \|\| Val.equals("512+") \|\| Val.equals("1024+") \|\|
	Val.equals("2048+")) {
	unsigned Bits = 0;
	if (Val.endswith("+"))
	Val = Val.substr(0, Val.size() - 1);
	else {
	bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid;
	assert(!Invalid && "Failed to parse value");
	CmdArgs.push_back(
	Args.MakeArgString("-mvscale-max=" + llvm::Twine(Bits / 128)));
	}

	bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid;
	assert(!Invalid && "Failed to parse value");
	CmdArgs.push_back(
	Args.MakeArgString("-mvscale-min=" + llvm::Twine(Bits / 128)));
	// Silently drop requests for vector-length agnostic code as it's implied.
	} else if (!Val.equals("scalable"))
	// Handle the unsupported values passed to msve-vector-bits.
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	}

	AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);

	if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
	CmdArgs.push_back("-tune-cpu");
	if (strcmp(A->getValue(), "native") == 0)
	CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName()));
	else
	CmdArgs.push_back(A->getValue());
	}

	AddUnalignedAccessWarning(CmdArgs);
	}

	void Clang::AddLoongArchTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(loongarch::getLoongArchABI(getToolChain().getDriver(), Args,
	getToolChain().getTriple())
	.data());
	}

	void Clang::AddMIPSTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const Driver &D = getToolChain().getDriver();
	StringRef CPUName;
	StringRef ABIName;
	const llvm::Triple &Triple = getToolChain().getTriple();
	mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());

	mips::FloatABI ABI = mips::getMipsFloatABI(D, Args, Triple);
	if (ABI == mips::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(ABI == mips::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1,
	options::OPT_mno_ldc1_sdc1)) {
	if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mno-ldc1-sdc1");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
	options::OPT_mno_check_zero_division)) {
	if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mno-check-zero-division");
	}
	}

	if (Args.getLastArg(options::OPT_mfix4300)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mfix4300");
	}

	if (Arg *A = Args.getLastArg(options::OPT_G)) {
	StringRef v = A->getValue();
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v));
	A->claim();
	}

	Arg *GPOpt = Args.getLastArg(options::OPT_mgpopt, options::OPT_mno_gpopt);
	Arg *ABICalls =
	Args.getLastArg(options::OPT_mabicalls, options::OPT_mno_abicalls);

	// -mabicalls is the default for many MIPS environments, even with -fno-pic.
	// -mgpopt is the default for static, -fno-pic environments but these two
	// options conflict. We want to be certain that -mno-abicalls -mgpopt is
	// the only case where -mllvm -mgpopt is passed.
	// NOTE: We need a warning here or in the backend to warn when -mgpopt is
	// passed explicitly when compiling something with -mabicalls
	// (implictly) in affect. Currently the warning is in the backend.
	//
	// When the ABI in use is N64, we also need to determine the PIC mode that
	// is in use, as -fno-pic for N64 implies -mno-abicalls.
	bool NoABICalls =
	ABICalls && ABICalls->getOption().matches(options::OPT_mno_abicalls);

	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) =
	ParsePICArgs(getToolChain(), Args);

	NoABICalls = NoABICalls \|\|
	(RelocationModel == llvm::Reloc::Static && ABIName == "n64");

	bool WantGPOpt = GPOpt && GPOpt->getOption().matches(options::OPT_mgpopt);
	// We quietly ignore -mno-gpopt as the backend defaults to -mno-gpopt.
	if (NoABICalls && (!GPOpt \|\| WantGPOpt)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mgpopt");

	Arg *LocalSData = Args.getLastArg(options::OPT_mlocal_sdata,
	options::OPT_mno_local_sdata);
	Arg *ExternSData = Args.getLastArg(options::OPT_mextern_sdata,
	options::OPT_mno_extern_sdata);
	Arg *EmbeddedData = Args.getLastArg(options::OPT_membedded_data,
	options::OPT_mno_embedded_data);
	if (LocalSData) {
	CmdArgs.push_back("-mllvm");
	if (LocalSData->getOption().matches(options::OPT_mlocal_sdata)) {
	CmdArgs.push_back("-mlocal-sdata=1");
	} else {
	CmdArgs.push_back("-mlocal-sdata=0");
	}
	LocalSData->claim();
	}

	if (ExternSData) {
	CmdArgs.push_back("-mllvm");
	if (ExternSData->getOption().matches(options::OPT_mextern_sdata)) {
	CmdArgs.push_back("-mextern-sdata=1");
	} else {
	CmdArgs.push_back("-mextern-sdata=0");
	}
	ExternSData->claim();
	}

	if (EmbeddedData) {
	CmdArgs.push_back("-mllvm");
	if (EmbeddedData->getOption().matches(options::OPT_membedded_data)) {
	CmdArgs.push_back("-membedded-data=1");
	} else {
	CmdArgs.push_back("-membedded-data=0");
	}
	EmbeddedData->claim();
	}

	} else if ((!ABICalls \|\| (!NoABICalls && ABICalls)) && WantGPOpt)
	D.Diag(diag::warn_drv_unsupported_gpopt) << (ABICalls ? 0 : 1);

	if (GPOpt)
	GPOpt->claim();

	if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) {
	StringRef Val = StringRef(A->getValue());
	if (mips::hasCompactBranches(CPUName)) {
	if (Val == "never" \|\| Val == "always" \|\| Val == "optimal") {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val));
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	} else
	D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName;
	}

	if (Arg *A = Args.getLastArg(options::OPT_mrelax_pic_calls,
	options::OPT_mno_relax_pic_calls)) {
	if (A->getOption().matches(options::OPT_mno_relax_pic_calls)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mips-jalr-reloc=0");
	}
	}
	}

	void Clang::AddPPCTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &T = getToolChain().getTriple();
	if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) {
	CmdArgs.push_back("-tune-cpu");
	std::string CPU = ppc::getPPCTuneCPU(Args, T);
	CmdArgs.push_back(Args.MakeArgString(CPU));
	}

	// Select the ABI to use.
	const char *ABIName = nullptr;
	if (T.isOSBinFormatELF()) {
	switch (getToolChain().getArch()) {
	case llvm::Triple::ppc64: {
	if (T.isPPC64ELFv2ABI())
	ABIName = "elfv2";
	else
	ABIName = "elfv1";
	break;
	}
	case llvm::Triple::ppc64le:
	ABIName = "elfv2";
	break;
	default:
	break;
	}
	}

	bool IEEELongDouble = getToolChain().defaultToIEEELongDouble();
	for (const Arg *A : Args.filtered(options::OPT_mabi_EQ)) {
	StringRef V = A->getValue();
	if (V == "ieeelongdouble")
	IEEELongDouble = true;
	else if (V == "ibmlongdouble")
	IEEELongDouble = false;
	else if (V != "altivec")
	// The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
	// the option if given as we don't have backend support for any targets
	// that don't use the altivec abi.
	ABIName = A->getValue();
	}
	if (IEEELongDouble)
	CmdArgs.push_back("-mabi=ieeelongdouble");

	ppc::FloatABI FloatABI =
	ppc::getPPCFloatABI(getToolChain().getDriver(), Args);

	if (FloatABI == ppc::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	if (ABIName) {
	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName);
	}
	}

	static void SetRISCVSmallDataLimit(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs) {
	const Driver &D = TC.getDriver();
	const llvm::Triple &Triple = TC.getTriple();
	// Default small data limitation is eight.
	const char *SmallDataLimit = "8";
	// Get small data limitation.
	if (Args.getLastArg(options::OPT_shared, options::OPT_fpic,
	options::OPT_fPIC)) {
	// Not support linker relaxation for PIC.
	SmallDataLimit = "0";
	if (Args.hasArg(options::OPT_G)) {
	D.Diag(diag::warn_drv_unsupported_sdata);
	}
	} else if (Args.getLastArgValue(options::OPT_mcmodel_EQ)
	.equals_insensitive("large") &&
	(Triple.getArch() == llvm::Triple::riscv64)) {
	// Not support linker relaxation for RV64 with large code model.
	SmallDataLimit = "0";
	if (Args.hasArg(options::OPT_G)) {
	D.Diag(diag::warn_drv_unsupported_sdata);
	}
	} else if (Arg *A = Args.getLastArg(options::OPT_G)) {
	SmallDataLimit = A->getValue();
	}
	// Forward the -msmall-data-limit= option.
	CmdArgs.push_back("-msmall-data-limit");
	CmdArgs.push_back(SmallDataLimit);
	}

	void Clang::AddRISCVTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &Triple = getToolChain().getTriple();
	StringRef ABIName = riscv::getRISCVABI(Args, Triple);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());

	SetRISCVSmallDataLimit(getToolChain(), Args, CmdArgs);

	if (!Args.hasFlag(options::OPT_mimplicit_float,
	options::OPT_mno_implicit_float, true))
	CmdArgs.push_back("-no-implicit-float");

	if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) {
	CmdArgs.push_back("-tune-cpu");
	if (strcmp(A->getValue(), "native") == 0)
	CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName()));
	else
	CmdArgs.push_back(A->getValue());
	}
	}

	void Clang::AddSparcTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	sparc::FloatABI FloatABI =
	sparc::getSparcFloatABI(getToolChain().getDriver(), Args);

	if (FloatABI == sparc::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
	StringRef Name = A->getValue();
	std::string TuneCPU;
	if (Name == "native")
	TuneCPU = std::string(llvm::sys::getHostCPUName());
	else
	TuneCPU = std::string(Name);

	CmdArgs.push_back("-tune-cpu");
	CmdArgs.push_back(Args.MakeArgString(TuneCPU));
	}
	}

	void Clang::AddSystemZTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) {
	CmdArgs.push_back("-tune-cpu");
	if (strcmp(A->getValue(), "native") == 0)
	CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName()));
	else
	CmdArgs.push_back(A->getValue());
	}

	bool HasBackchain =
	Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false);
	bool HasPackedStack = Args.hasFlag(options::OPT_mpacked_stack,
	options::OPT_mno_packed_stack, false);
	systemz::FloatABI FloatABI =
	systemz::getSystemZFloatABI(getToolChain().getDriver(), Args);
	bool HasSoftFloat = (FloatABI == systemz::FloatABI::Soft);
	if (HasBackchain && HasPackedStack && !HasSoftFloat) {
	const Driver &D = getToolChain().getDriver();
	D.Diag(diag::err_drv_unsupported_opt)
	<< "-mpacked-stack -mbackchain -mhard-float";
	}
	if (HasBackchain)
	CmdArgs.push_back("-mbackchain");
	if (HasPackedStack)
	CmdArgs.push_back("-mpacked-stack");
	if (HasSoftFloat) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	}
	}

	void Clang::AddX86TargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const Driver &D = getToolChain().getDriver();
	addX86AlignBranchArgs(D, Args, CmdArgs, /IsLTO=/false);

	if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) \|\|
	Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext))
	CmdArgs.push_back("-disable-red-zone");

	if (!Args.hasFlag(options::OPT_mtls_direct_seg_refs,
	options::OPT_mno_tls_direct_seg_refs, true))
	CmdArgs.push_back("-mno-tls-direct-seg-refs");

	// Default to avoid implicit floating-point for kernel/kext code, but allow
	// that to be overridden with -mno-soft-float.
	bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext));
	if (Arg *A = Args.getLastArg(
	options::OPT_msoft_float, options::OPT_mno_soft_float,
	options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) {
	const Option &O = A->getOption();
	NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) \|\|
	O.matches(options::OPT_msoft_float));
	}
	if (NoImplicitFloat)
	CmdArgs.push_back("-no-implicit-float");

	if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
	StringRef Value = A->getValue();
	if (Value == "intel" \|\| Value == "att") {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
	CmdArgs.push_back(Args.MakeArgString("-inline-asm=" + Value));
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Value;
	}
	} else if (D.IsCLMode()) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-x86-asm-syntax=intel");
	}

	if (Arg *A = Args.getLastArg(options::OPT_mskip_rax_setup,
	options::OPT_mno_skip_rax_setup))
	if (A->getOption().matches(options::OPT_mskip_rax_setup))
	CmdArgs.push_back(Args.MakeArgString("-mskip-rax-setup"));

	// Set flags to support MCU ABI.
	if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	CmdArgs.push_back("-mstack-alignment=4");
	}

	// Handle -mtune.

	// Default to "generic" unless -march is present or targetting the PS4/PS5.
	std::string TuneCPU;
	if (!Args.hasArg(clang::driver::options::OPT_march_EQ) &&
	!getToolChain().getTriple().isPS())
	TuneCPU = "generic";

	// Override based on -mtune.
	if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
	StringRef Name = A->getValue();

	if (Name == "native") {
	Name = llvm::sys::getHostCPUName();
	if (!Name.empty())
	TuneCPU = std::string(Name);
	} else
	TuneCPU = std::string(Name);
	}

	if (!TuneCPU.empty()) {
	CmdArgs.push_back("-tune-cpu");
	CmdArgs.push_back(Args.MakeArgString(TuneCPU));
	}
	}

	void Clang::AddHexagonTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	CmdArgs.push_back("-mqdsp6-compat");
	CmdArgs.push_back("-Wreturn-type");

	if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(
	Args.MakeArgString("-hexagon-small-data-threshold=" + Twine(*G)));
	}

	if (!Args.hasArg(options::OPT_fno_short_enums))
	CmdArgs.push_back("-fshort-enums");
	if (Args.getLastArg(options::OPT_mieee_rnd_near)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-enable-hexagon-ieee-rnd-near");
	}
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-machine-sink-split=0");
	}

	void Clang::AddLanaiTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
	StringRef CPUName = A->getValue();

	CmdArgs.push_back("-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(CPUName));
	}
	if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
	StringRef Value = A->getValue();
	// Only support mregparm=4 to support old usage. Report error for all other
	// cases.
	int Mregparm;
	if (Value.getAsInteger(10, Mregparm)) {
	if (Mregparm != 4) {
	getToolChain().getDriver().Diag(
	diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Value;
	}
	}
	}
	}

	void Clang::AddWebAssemblyTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Default to "hidden" visibility.
	if (!Args.hasArg(options::OPT_fvisibility_EQ,
	options::OPT_fvisibility_ms_compat))
	CmdArgs.push_back("-fvisibility=hidden");
	}

	void Clang::AddVETargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
	// Floating point operations and argument passing are hard.
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename,
	StringRef Target, const InputInfo &Output,
	const InputInfo &Input, const ArgList &Args) const {
	// If this is a dry run, do not create the compilation database file.
	if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
	return;

	using llvm::yaml::escape;
	const Driver &D = getToolChain().getDriver();

	if (!CompilationDatabase) {
	std::error_code EC;
	auto File = std::make_unique<llvm::raw_fd_ostream>(
	Filename, EC,
	llvm::sys::fs::OF_TextWithCRLF \| llvm::sys::fs::OF_Append);
	if (EC) {
	D.Diag(clang::diag::err_drv_compilationdatabase) << Filename
	<< EC.message();
	return;
	}
	CompilationDatabase = std::move(File);
	}
	auto &CDB = *CompilationDatabase;
	auto CWD = D.getVFS().getCurrentWorkingDirectory();
	if (!CWD)
	CWD = ".";
	CDB << "{ \"directory\": \"" << escape(*CWD) << "\"";
	CDB << ", \"file\": \"" << escape(Input.getFilename()) << "\"";
	CDB << ", \"output\": \"" << escape(Output.getFilename()) << "\"";
	CDB << ", \"arguments\": [\"" << escape(D.ClangExecutable) << "\"";
	SmallString<128> Buf;
	Buf = "-x";
	Buf += types::getTypeName(Input.getType());
	CDB << ", \"" << escape(Buf) << "\"";
	if (!D.SysRoot.empty() && !Args.hasArg(options::OPT__sysroot_EQ)) {
	Buf = "--sysroot=";
	Buf += D.SysRoot;
	CDB << ", \"" << escape(Buf) << "\"";
	}
	CDB << ", \"" << escape(Input.getFilename()) << "\"";
	CDB << ", \"-o\", \"" << escape(Output.getFilename()) << "\"";
	for (auto &A: Args) {
	auto &O = A->getOption();
	// Skip language selection, which is positional.
	if (O.getID() == options::OPT_x)
	continue;
	// Skip writing dependency output and the compilation database itself.
	if (O.getGroup().isValid() && O.getGroup().getID() == options::OPT_M_Group)
	continue;
	if (O.getID() == options::OPT_gen_cdb_fragment_path)
	continue;
	// Skip inputs.
	if (O.getKind() == Option::InputClass)
	continue;
	// Skip output.
	if (O.getID() == options::OPT_o)
	continue;
	// All other arguments are quoted and appended.
	ArgStringList ASL;
	A->render(Args, ASL);
	for (auto &it: ASL)
	CDB << ", \"" << escape(it) << "\"";
	}
	Buf = "--target=";
	Buf += Target;
	CDB << ", \"" << escape(Buf) << "\"]},\n";
	}

	void Clang::DumpCompilationDatabaseFragmentToDir(
	StringRef Dir, Compilation &C, StringRef Target, const InputInfo &Output,
	const InputInfo &Input, const llvm::opt::ArgList &Args) const {
	// If this is a dry run, do not create the compilation database file.
	if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
	return;

	if (CompilationDatabase)
	DumpCompilationDatabase(C, "", Target, Output, Input, Args);

	SmallString<256> Path = Dir;
	const auto &Driver = C.getDriver();
	Driver.getVFS().makeAbsolute(Path);
	auto Err = llvm::sys::fs::create_directory(Path, /IgnoreExisting=/true);
	if (Err) {
	Driver.Diag(diag::err_drv_compilationdatabase) << Dir << Err.message();
	return;
	}

	llvm::sys::path::append(
	Path,
	Twine(llvm::sys::path::filename(Input.getFilename())) + ".%%%%.json");
	int FD;
	SmallString<256> TempPath;
	Err = llvm::sys::fs::createUniqueFile(Path, FD, TempPath,
	llvm::sys::fs::OF_Text);
	if (Err) {
	Driver.Diag(diag::err_drv_compilationdatabase) << Path << Err.message();
	return;
	}
	CompilationDatabase =
	std::make_unique<llvm::raw_fd_ostream>(FD, /shouldClose=/true);
	DumpCompilationDatabase(C, "", Target, Output, Input, Args);
	}

	static bool CheckARMImplicitITArg(StringRef Value) {
	return Value == "always" \|\| Value == "never" \|\| Value == "arm" \|\|
	Value == "thumb";
	}

	static void AddARMImplicitITArgs(const ArgList &Args, ArgStringList &CmdArgs,
	StringRef Value) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-arm-implicit-it=" + Value));
	}

	static void CollectArgsForIntegratedAssembler(Compilation &C,
	const ArgList &Args,
	ArgStringList &CmdArgs,
	const Driver &D) {
	if (UseRelaxAll(C, Args))
	CmdArgs.push_back("-mrelax-all");

	// Only default to -mincremental-linker-compatible if we think we are
	// targeting the MSVC linker.
	bool DefaultIncrementalLinkerCompatible =
	C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
	if (Args.hasFlag(options::OPT_mincremental_linker_compatible,
	options::OPT_mno_incremental_linker_compatible,
	DefaultIncrementalLinkerCompatible))
	CmdArgs.push_back("-mincremental-linker-compatible");

	Args.AddLastArg(CmdArgs, options::OPT_femit_dwarf_unwind_EQ);

	// If you add more args here, also add them to the block below that
	// starts with "// If CollectArgsForIntegratedAssembler() isn't called below".

	// When passing -I arguments to the assembler we sometimes need to
	// unconditionally take the next argument. For example, when parsing
	// '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
	// -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
	// arg after parsing the '-I' arg.
	bool TakeNextArg = false;

	bool UseRelaxRelocations = C.getDefaultToolChain().useRelaxRelocations();
	bool UseNoExecStack = false;
	const char *MipsTargetFeature = nullptr;
	StringRef ImplicitIt;
	for (const Arg *A :
	Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler,
	options::OPT_mimplicit_it_EQ)) {
	A->claim();

	if (A->getOption().getID() == options::OPT_mimplicit_it_EQ) {
	switch (C.getDefaultToolChain().getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	// Only store the value; the last value set takes effect.
	ImplicitIt = A->getValue();
	if (!CheckARMImplicitITArg(ImplicitIt))
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << ImplicitIt;
	continue;
	default:
	break;
	}
	}

	for (StringRef Value : A->getValues()) {
	if (TakeNextArg) {
	CmdArgs.push_back(Value.data());
	TakeNextArg = false;
	continue;
	}

	if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
	Value == "-mbig-obj")
	continue; // LLVM handles bigobj automatically

	switch (C.getDefaultToolChain().getArch()) {
	default:
	break;
	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	if (Value == "--no-type-check") {
	CmdArgs.push_back("-mno-type-check");
	continue;
	}
	break;
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	if (Value.startswith("-mimplicit-it=")) {
	// Only store the value; the last value set takes effect.
	ImplicitIt = Value.split("=").second;
	if (CheckARMImplicitITArg(ImplicitIt))
	continue;
	}
	if (Value == "-mthumb")
	// -mthumb has already been processed in ComputeLLVMTriple()
	// recognize but skip over here.
	continue;
	break;
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	if (Value == "--trap") {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+use-tcc-in-div");
	continue;
	}
	if (Value == "--break") {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-use-tcc-in-div");
	continue;
	}
	if (Value.startswith("-msoft-float")) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+soft-float");
	continue;
	}
	if (Value.startswith("-mhard-float")) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-soft-float");
	continue;
	}

	MipsTargetFeature = llvm::StringSwitch<const char *>(Value)
	.Case("-mips1", "+mips1")
	.Case("-mips2", "+mips2")
	.Case("-mips3", "+mips3")
	.Case("-mips4", "+mips4")
	.Case("-mips5", "+mips5")
	.Case("-mips32", "+mips32")
	.Case("-mips32r2", "+mips32r2")
	.Case("-mips32r3", "+mips32r3")
	.Case("-mips32r5", "+mips32r5")
	.Case("-mips32r6", "+mips32r6")
	.Case("-mips64", "+mips64")
	.Case("-mips64r2", "+mips64r2")
	.Case("-mips64r3", "+mips64r3")
	.Case("-mips64r5", "+mips64r5")
	.Case("-mips64r6", "+mips64r6")
	.Default(nullptr);
	if (MipsTargetFeature)
	continue;
	}

	if (Value == "-force_cpusubtype_ALL") {
	// Do nothing, this is the default and we don't support anything else.
	} else if (Value == "-L") {
	CmdArgs.push_back("-msave-temp-labels");
	} else if (Value == "--fatal-warnings") {
	CmdArgs.push_back("-massembler-fatal-warnings");
	} else if (Value == "--no-warn" \|\| Value == "-W") {
	CmdArgs.push_back("-massembler-no-warn");
	} else if (Value == "--noexecstack") {
	UseNoExecStack = true;
	} else if (Value.startswith("-compress-debug-sections") \|\|
	Value.startswith("--compress-debug-sections") \|\|
	Value == "-nocompress-debug-sections" \|\|
	Value == "--nocompress-debug-sections") {
	CmdArgs.push_back(Value.data());
	} else if (Value == "-mrelax-relocations=yes" \|\|
	Value == "--mrelax-relocations=yes") {
	UseRelaxRelocations = true;
	} else if (Value == "-mrelax-relocations=no" \|\|
	Value == "--mrelax-relocations=no") {
	UseRelaxRelocations = false;
	} else if (Value.startswith("-I")) {
	CmdArgs.push_back(Value.data());
	// We need to consume the next argument if the current arg is a plain
	// -I. The next arg will be the include directory.
	if (Value == "-I")
	TakeNextArg = true;
	} else if (Value.startswith("-gdwarf-")) {
	// "-gdwarf-N" options are not cc1as options.
	unsigned DwarfVersion = DwarfVersionNum(Value);
	if (DwarfVersion == 0) { // Send it onward, and let cc1as complain.
	CmdArgs.push_back(Value.data());
	} else {
	RenderDebugEnablingArgs(Args, CmdArgs,
	codegenoptions::DebugInfoConstructor,
	DwarfVersion, llvm::DebuggerKind::Default);
	}
	} else if (Value.startswith("-mcpu") \|\| Value.startswith("-mfpu") \|\|
	Value.startswith("-mhwdiv") \|\| Value.startswith("-march")) {
	// Do nothing, we'll validate it later.
	} else if (Value == "-defsym") {
	if (A->getNumValues() != 2) {
	D.Diag(diag::err_drv_defsym_invalid_format) << Value;
	break;
	}
	const char *S = A->getValue(1);
	auto Pair = StringRef(S).split('=');
	auto Sym = Pair.first;
	auto SVal = Pair.second;

	if (Sym.empty() \|\| SVal.empty()) {
	D.Diag(diag::err_drv_defsym_invalid_format) << S;
	break;
	}
	int64_t IVal;
	if (SVal.getAsInteger(0, IVal)) {
	D.Diag(diag::err_drv_defsym_invalid_symval) << SVal;
	break;
	}
	CmdArgs.push_back(Value.data());
	TakeNextArg = true;
	} else if (Value == "-fdebug-compilation-dir") {
	CmdArgs.push_back("-fdebug-compilation-dir");
	TakeNextArg = true;
	} else if (Value.consume_front("-fdebug-compilation-dir=")) {
	// The flag is a -Wa / -Xassembler argument and Options doesn't
	// parse the argument, so this isn't automatically aliased to
	// -fdebug-compilation-dir (without '=') here.
	CmdArgs.push_back("-fdebug-compilation-dir");
	CmdArgs.push_back(Value.data());
	} else if (Value == "--version") {
	D.PrintVersion(C, llvm::outs());
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Value;
	}
	}
	}
	if (ImplicitIt.size())
	AddARMImplicitITArgs(Args, CmdArgs, ImplicitIt);
	if (!UseRelaxRelocations)
	CmdArgs.push_back("-mrelax-relocations=no");
	if (UseNoExecStack)
	CmdArgs.push_back("-mnoexecstack");
	if (MipsTargetFeature != nullptr) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back(MipsTargetFeature);
	}

	// forward -fembed-bitcode to assmebler
	if (C.getDriver().embedBitcodeEnabled() \|\|
	C.getDriver().embedBitcodeMarkerOnly())
	Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);

	if (const char *AsSecureLogFile = getenv("AS_SECURE_LOG_FILE")) {
	CmdArgs.push_back("-as-secure-log-file");
	CmdArgs.push_back(Args.MakeArgString(AsSecureLogFile));
	}
	}

	static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
	bool OFastEnabled, const ArgList &Args,
	ArgStringList &CmdArgs,
	const JobAction &JA) {
	// Handle various floating point optimization flags, mapping them to the
	// appropriate LLVM code generation flags. This is complicated by several
	// "umbrella" flags, so we do this by stepping through the flags incrementally
	// adjusting what we think is enabled/disabled, then at the end setting the
	// LLVM flags based on the final state.
	bool HonorINFs = true;
	bool HonorNaNs = true;
	bool ApproxFunc = false;
	// -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
	bool MathErrno = TC.IsMathErrnoDefault();
	bool AssociativeMath = false;
	bool ReciprocalMath = false;
	bool SignedZeros = true;
	bool TrappingMath = false; // Implemented via -ffp-exception-behavior
	bool TrappingMathPresent = false; // Is trapping-math in args, and not
	// overriden by ffp-exception-behavior?
	bool RoundingFPMath = false;
	bool RoundingMathPresent = false; // Is rounding-math in args?
	// -ffp-model values: strict, fast, precise
	StringRef FPModel = "";
	// -ffp-exception-behavior options: strict, maytrap, ignore
	StringRef FPExceptionBehavior = "";
	// -ffp-eval-method options: double, extended, source
	StringRef FPEvalMethod = "";
	const llvm::DenormalMode DefaultDenormalFPMath =
	TC.getDefaultDenormalModeForType(Args, JA);
	const llvm::DenormalMode DefaultDenormalFP32Math =
	TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle());

	llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
	llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
	// CUDA and HIP don't rely on the frontend to pass an ffp-contract option.
	// If one wasn't given by the user, don't pass it here.
	StringRef FPContract;
	StringRef LastSeenFfpContractOption;
	bool SeenUnsafeMathModeOption = false;
	if (!JA.isDeviceOffloading(Action::OFK_Cuda) &&
	!JA.isOffloading(Action::OFK_HIP))
	FPContract = "on";
	bool StrictFPModel = false;
	StringRef Float16ExcessPrecision = "";

	if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
	CmdArgs.push_back("-mlimit-float-precision");
	CmdArgs.push_back(A->getValue());
	}

	for (const Arg *A : Args) {
	auto optID = A->getOption().getID();
	bool PreciseFPModel = false;
	switch (optID) {
	default:
	break;
	case options::OPT_ffp_model_EQ: {
	// If -ffp-model= is seen, reset to fno-fast-math
	HonorINFs = true;
	HonorNaNs = true;
	ApproxFunc = false;
	// Turning off -ffast-math restores the toolchain default.
	MathErrno = TC.IsMathErrnoDefault();
	AssociativeMath = false;
	ReciprocalMath = false;
	SignedZeros = true;
	// -fno_fast_math restores default denormal and fpcontract handling
	FPContract = "on";
	DenormalFPMath = llvm::DenormalMode::getIEEE();

	// FIXME: The target may have picked a non-IEEE default mode here based on
	// -cl-denorms-are-zero. Should the target consider -fp-model interaction?
	DenormalFP32Math = llvm::DenormalMode::getIEEE();

	StringRef Val = A->getValue();
	if (OFastEnabled && !Val.equals("fast")) {
	// Only -ffp-model=fast is compatible with OFast, ignore.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-model=" + Val)
	<< "-Ofast";
	break;
	}
	StrictFPModel = false;
	PreciseFPModel = true;
	// ffp-model= is a Driver option, it is entirely rewritten into more
	// granular options before being passed into cc1.
	// Use the gcc option in the switch below.
	if (!FPModel.empty() && !FPModel.equals(Val))
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-model=" + FPModel)
	<< Args.MakeArgString("-ffp-model=" + Val);
	if (Val.equals("fast")) {
	optID = options::OPT_ffast_math;
	FPModel = Val;
	FPContract = "fast";
	} else if (Val.equals("precise")) {
	optID = options::OPT_ffp_contract;
	FPModel = Val;
	FPContract = "on";
	PreciseFPModel = true;
	} else if (Val.equals("strict")) {
	StrictFPModel = true;
	optID = options::OPT_frounding_math;
	FPExceptionBehavior = "strict";
	FPModel = Val;
	FPContract = "off";
	TrappingMath = true;
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	break;
	}
	}

	switch (optID) {
	// If this isn't an FP option skip the claim below
	default: continue;

	// Options controlling individual features
	case options::OPT_fhonor_infinities: HonorINFs = true; break;
	case options::OPT_fno_honor_infinities: HonorINFs = false; break;
	case options::OPT_fhonor_nans: HonorNaNs = true; break;
	case options::OPT_fno_honor_nans: HonorNaNs = false; break;
	case options::OPT_fapprox_func: ApproxFunc = true; break;
	case options::OPT_fno_approx_func: ApproxFunc = false; break;
	case options::OPT_fmath_errno: MathErrno = true; break;
	case options::OPT_fno_math_errno: MathErrno = false; break;
	case options::OPT_fassociative_math: AssociativeMath = true; break;
	case options::OPT_fno_associative_math: AssociativeMath = false; break;
	case options::OPT_freciprocal_math: ReciprocalMath = true; break;
	case options::OPT_fno_reciprocal_math: ReciprocalMath = false; break;
	case options::OPT_fsigned_zeros: SignedZeros = true; break;
	case options::OPT_fno_signed_zeros: SignedZeros = false; break;
	case options::OPT_ftrapping_math:
	if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
	!FPExceptionBehavior.equals("strict"))
	// Warn that previous value of option is overridden.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
	<< "-ftrapping-math";
	TrappingMath = true;
	TrappingMathPresent = true;
	FPExceptionBehavior = "strict";
	break;
	case options::OPT_fno_trapping_math:
	if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
	!FPExceptionBehavior.equals("ignore"))
	// Warn that previous value of option is overridden.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
	<< "-fno-trapping-math";
	TrappingMath = false;
	TrappingMathPresent = true;
	FPExceptionBehavior = "ignore";
	break;

	case options::OPT_frounding_math:
	RoundingFPMath = true;
	RoundingMathPresent = true;
	break;

	case options::OPT_fno_rounding_math:
	RoundingFPMath = false;
	RoundingMathPresent = false;
	break;

	case options::OPT_fdenormal_fp_math_EQ:
	DenormalFPMath = llvm::parseDenormalFPAttribute(A->getValue());
	DenormalFP32Math = DenormalFPMath;
	if (!DenormalFPMath.isValid()) {
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	}
	break;

	case options::OPT_fdenormal_fp_math_f32_EQ:
	DenormalFP32Math = llvm::parseDenormalFPAttribute(A->getValue());
	if (!DenormalFP32Math.isValid()) {
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	}
	break;

	// Validate and pass through -ffp-contract option.
	case options::OPT_ffp_contract: {
	StringRef Val = A->getValue();
	if (PreciseFPModel) {
	// -ffp-model=precise enables ffp-contract=on.
	// -ffp-model=precise sets PreciseFPModel to on and Val to
	// "precise". FPContract is set.
	;
	} else if (Val.equals("fast") \|\| Val.equals("on") \|\| Val.equals("off")) {
	FPContract = Val;
	LastSeenFfpContractOption = Val;
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	break;
	}

	// Validate and pass through -ffp-model option.
	case options::OPT_ffp_model_EQ:
	// This should only occur in the error case
	// since the optID has been replaced by a more granular
	// floating point option.
	break;

	// Validate and pass through -ffp-exception-behavior option.
	case options::OPT_ffp_exception_behavior_EQ: {
	StringRef Val = A->getValue();
	if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
	!FPExceptionBehavior.equals(Val))
	// Warn that previous value of option is overridden.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
	<< Args.MakeArgString("-ffp-exception-behavior=" + Val);
	TrappingMath = TrappingMathPresent = false;
	if (Val.equals("ignore") \|\| Val.equals("maytrap"))
	FPExceptionBehavior = Val;
	else if (Val.equals("strict")) {
	FPExceptionBehavior = Val;
	TrappingMath = TrappingMathPresent = true;
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	break;
	}

	// Validate and pass through -ffp-eval-method option.
	case options::OPT_ffp_eval_method_EQ: {
	StringRef Val = A->getValue();
	if (Val.equals("double") \|\| Val.equals("extended") \|\|
	Val.equals("source"))
	FPEvalMethod = Val;
	else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	break;
	}

	case options::OPT_fexcess_precision_EQ: {
	StringRef Val = A->getValue();
	const llvm::Triple::ArchType Arch = TC.getArch();
	if (Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64) {
	if (Val.equals("standard") \|\| Val.equals("fast"))
	Float16ExcessPrecision = Val;
	// To make it GCC compatible, allow the value of "16" which
	// means disable excess precision, the same meaning than clang's
	// equivalent value "none".
	else if (Val.equals("16"))
	Float16ExcessPrecision = "none";
	else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	} else {
	if (!(Val.equals("standard") \|\| Val.equals("fast")))
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	}
	break;
	}
	case options::OPT_ffinite_math_only:
	HonorINFs = false;
	HonorNaNs = false;
	break;
	case options::OPT_fno_finite_math_only:
	HonorINFs = true;
	HonorNaNs = true;
	break;

	case options::OPT_funsafe_math_optimizations:
	AssociativeMath = true;
	ReciprocalMath = true;
	SignedZeros = false;
	ApproxFunc = true;
	TrappingMath = false;
	FPExceptionBehavior = "";
	FPContract = "fast";
	SeenUnsafeMathModeOption = true;
	break;
	case options::OPT_fno_unsafe_math_optimizations:
	AssociativeMath = false;
	ReciprocalMath = false;
	SignedZeros = true;
	ApproxFunc = false;
	TrappingMath = true;
	FPExceptionBehavior = "strict";

	// The target may have opted to flush by default, so force IEEE.
	DenormalFPMath = llvm::DenormalMode::getIEEE();
	DenormalFP32Math = llvm::DenormalMode::getIEEE();
	if (!JA.isDeviceOffloading(Action::OFK_Cuda) &&
	!JA.isOffloading(Action::OFK_HIP)) {
	if (LastSeenFfpContractOption != "") {
	FPContract = LastSeenFfpContractOption;
	} else if (SeenUnsafeMathModeOption)
	FPContract = "on";
	}
	break;

	case options::OPT_Ofast:
	// If -Ofast is the optimization level, then -ffast-math should be enabled
	if (!OFastEnabled)
	continue;
	[[fallthrough]];
	case options::OPT_ffast_math:
	HonorINFs = false;
	HonorNaNs = false;
	MathErrno = false;
	AssociativeMath = true;
	ReciprocalMath = true;
	ApproxFunc = true;
	SignedZeros = false;
	TrappingMath = false;
	RoundingFPMath = false;
	FPExceptionBehavior = "";
	// If fast-math is set then set the fp-contract mode to fast.
	FPContract = "fast";
	SeenUnsafeMathModeOption = true;
	break;
	case options::OPT_fno_fast_math:
	HonorINFs = true;
	HonorNaNs = true;
	// Turning on -ffast-math (with either flag) removes the need for
	// MathErrno. However, turning off -ffast-math merely restores the
	// toolchain default (which may be false).
	MathErrno = TC.IsMathErrnoDefault();
	AssociativeMath = false;
	ReciprocalMath = false;
	ApproxFunc = false;
	SignedZeros = true;
	// -fno_fast_math restores default denormal and fpcontract handling
	DenormalFPMath = DefaultDenormalFPMath;
	DenormalFP32Math = llvm::DenormalMode::getIEEE();
	if (!JA.isDeviceOffloading(Action::OFK_Cuda) &&
	!JA.isOffloading(Action::OFK_HIP)) {
	if (LastSeenFfpContractOption != "") {
	FPContract = LastSeenFfpContractOption;
	} else if (SeenUnsafeMathModeOption)
	FPContract = "on";
	}
	break;
	}
	if (StrictFPModel) {
	// If -ffp-model=strict has been specified on command line but
	// subsequent options conflict then emit warning diagnostic.
	if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath &&
	SignedZeros && TrappingMath && RoundingFPMath && !ApproxFunc &&
	DenormalFPMath == llvm::DenormalMode::getIEEE() &&
	DenormalFP32Math == llvm::DenormalMode::getIEEE() &&
	FPContract.equals("off"))
	// OK: Current Arg doesn't conflict with -ffp-model=strict
	;
	else {
	StrictFPModel = false;
	FPModel = "";
	auto RHS = (A->getNumValues() == 0)
	? A->getSpelling()
	: Args.MakeArgString(A->getSpelling() + A->getValue());
	if (RHS != "-ffp-model=strict")
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< "-ffp-model=strict" << RHS;
	}
	}

	// If we handled this option claim it
	A->claim();
	}

	if (!HonorINFs)
	CmdArgs.push_back("-menable-no-infs");

	if (!HonorNaNs)
	CmdArgs.push_back("-menable-no-nans");

	if (ApproxFunc)
	CmdArgs.push_back("-fapprox-func");

	if (MathErrno)
	CmdArgs.push_back("-fmath-errno");

	if (AssociativeMath && ReciprocalMath && !SignedZeros && ApproxFunc &&
	!TrappingMath)
	CmdArgs.push_back("-funsafe-math-optimizations");

	if (!SignedZeros)
	CmdArgs.push_back("-fno-signed-zeros");

	if (AssociativeMath && !SignedZeros && !TrappingMath)
	CmdArgs.push_back("-mreassociate");

	if (ReciprocalMath)
	CmdArgs.push_back("-freciprocal-math");

	if (TrappingMath) {
	// FP Exception Behavior is also set to strict
	assert(FPExceptionBehavior.equals("strict"));
	}

	// The default is IEEE.
	if (DenormalFPMath != llvm::DenormalMode::getIEEE()) {
	llvm::SmallString<64> DenormFlag;
	llvm::raw_svector_ostream ArgStr(DenormFlag);
	ArgStr << "-fdenormal-fp-math=" << DenormalFPMath;
	CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
	}

	// Add f32 specific denormal mode flag if it's different.
	if (DenormalFP32Math != DenormalFPMath) {
	llvm::SmallString<64> DenormFlag;
	llvm::raw_svector_ostream ArgStr(DenormFlag);
	ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
	CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
	}

	if (!FPContract.empty())
	CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract));

	if (!RoundingFPMath)
	CmdArgs.push_back(Args.MakeArgString("-fno-rounding-math"));

	if (RoundingFPMath && RoundingMathPresent)
	CmdArgs.push_back(Args.MakeArgString("-frounding-math"));

	if (!FPExceptionBehavior.empty())
	CmdArgs.push_back(Args.MakeArgString("-ffp-exception-behavior=" +
	FPExceptionBehavior));

	if (!FPEvalMethod.empty())
	CmdArgs.push_back(Args.MakeArgString("-ffp-eval-method=" + FPEvalMethod));

	if (!Float16ExcessPrecision.empty())
	CmdArgs.push_back(Args.MakeArgString("-ffloat16-excess-precision=" +
	Float16ExcessPrecision));

	ParseMRecip(D, Args, CmdArgs);

	// -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the
	// individual features enabled by -ffast-math instead of the option itself as
	// that's consistent with gcc's behaviour.
	if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath && ApproxFunc &&
	ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
	CmdArgs.push_back("-ffast-math");
	if (FPModel.equals("fast")) {
	if (FPContract.equals("fast"))
	// All set, do nothing.
	;
	else if (FPContract.empty())
	// Enable -ffp-contract=fast
	CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
	else
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< "-ffp-model=fast"
	<< Args.MakeArgString("-ffp-contract=" + FPContract);
	}
	}

	// Handle __FINITE_MATH_ONLY__ similarly.
	if (!HonorINFs && !HonorNaNs)
	CmdArgs.push_back("-ffinite-math-only");

	if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) {
	CmdArgs.push_back("-mfpmath");
	CmdArgs.push_back(A->getValue());
	}

	// Disable a codegen optimization for floating-point casts.
	if (Args.hasFlag(options::OPT_fno_strict_float_cast_overflow,
	options::OPT_fstrict_float_cast_overflow, false))
	CmdArgs.push_back("-fno-strict-float-cast-overflow");
	}

	static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
	const llvm::Triple &Triple,
	const InputInfo &Input) {
	// Add default argument set.
	if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) {
	CmdArgs.push_back("-analyzer-checker=core");
	CmdArgs.push_back("-analyzer-checker=apiModeling");

	if (!Triple.isWindowsMSVCEnvironment()) {
	CmdArgs.push_back("-analyzer-checker=unix");
	} else {
	// Enable "unix" checkers that also work on Windows.
	CmdArgs.push_back("-analyzer-checker=unix.API");
	CmdArgs.push_back("-analyzer-checker=unix.Malloc");
	CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof");
	CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator");
	CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg");
	CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg");
	}

	// Disable some unix checkers for PS4/PS5.
	if (Triple.isPS()) {
	CmdArgs.push_back("-analyzer-disable-checker=unix.API");
	CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork");
	}

	if (Triple.isOSDarwin()) {
	CmdArgs.push_back("-analyzer-checker=osx");
	CmdArgs.push_back(
	"-analyzer-checker=security.insecureAPI.decodeValueOfObjCType");
	}
	else if (Triple.isOSFuchsia())
	CmdArgs.push_back("-analyzer-checker=fuchsia");

	CmdArgs.push_back("-analyzer-checker=deadcode");

	if (types::isCXX(Input.getType()))
	CmdArgs.push_back("-analyzer-checker=cplusplus");

	if (!Triple.isPS()) {
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.UncheckedReturn");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
	}

	// Default nullability checks.
	CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull");
	CmdArgs.push_back("-analyzer-checker=nullability.NullReturnedFromNonnull");
	}

	// Set the output format. The default is plist, for (lame) historical reasons.
	CmdArgs.push_back("-analyzer-output");
	if (Arg *A = Args.getLastArg(options::OPT__analyzer_output))
	CmdArgs.push_back(A->getValue());
	else
	CmdArgs.push_back("plist");

	// Disable the presentation of standard compiler warnings when using
	// --analyze. We only want to show static analyzer diagnostics or frontend
	// errors.
	CmdArgs.push_back("-w");

	// Add -Xanalyzer arguments when running as analyzer.
	Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer);
	}

	static bool isValidSymbolName(StringRef S) {
	if (S.empty())
	return false;

	if (std::isdigit(S[0]))
	return false;

	return llvm::all_of(S, [](char C) { return std::isalnum(C) \|\| C == '_'; });
	}

	static void RenderSSPOptions(const Driver &D, const ToolChain &TC,
	const ArgList &Args, ArgStringList &CmdArgs,
	bool KernelOrKext) {
	const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();

	// NVPTX doesn't support stack protectors; from the compiler's perspective, it
	// doesn't even have a stack!
	if (EffectiveTriple.isNVPTX())
	return;

	// -stack-protector=0 is default.
	LangOptions::StackProtectorMode StackProtectorLevel = LangOptions::SSPOff;
	LangOptions::StackProtectorMode DefaultStackProtectorLevel =
	TC.GetDefaultStackProtectorLevel(KernelOrKext);

	if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
	options::OPT_fstack_protector_all,
	options::OPT_fstack_protector_strong,
	options::OPT_fstack_protector)) {
	if (A->getOption().matches(options::OPT_fstack_protector))
	StackProtectorLevel =
	std::max<>(LangOptions::SSPOn, DefaultStackProtectorLevel);
	else if (A->getOption().matches(options::OPT_fstack_protector_strong))
	StackProtectorLevel = LangOptions::SSPStrong;
	else if (A->getOption().matches(options::OPT_fstack_protector_all))
	StackProtectorLevel = LangOptions::SSPReq;

	if (EffectiveTriple.isBPF() && StackProtectorLevel != LangOptions::SSPOff) {
	D.Diag(diag::warn_drv_unsupported_option_for_target)
	<< A->getSpelling() << EffectiveTriple.getTriple();
	StackProtectorLevel = DefaultStackProtectorLevel;
	}
	} else {
	StackProtectorLevel = DefaultStackProtectorLevel;
	}

	if (StackProtectorLevel) {
	CmdArgs.push_back("-stack-protector");
	CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel)));
	}

	// --param ssp-buffer-size=
	for (const Arg *A : Args.filtered(options::OPT__param)) {
	StringRef Str(A->getValue());
	if (Str.startswith("ssp-buffer-size=")) {
	if (StackProtectorLevel) {
	CmdArgs.push_back("-stack-protector-buffer-size");
	// FIXME: Verify the argument is a valid integer.
	CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16)));
	}
	A->claim();
	}
	}

	const std::string &TripleStr = EffectiveTriple.getTriple();
	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_EQ)) {
	StringRef Value = A->getValue();
	if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() &&
	!EffectiveTriple.isARM() && !EffectiveTriple.isThumb())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	if ((EffectiveTriple.isX86() \|\| EffectiveTriple.isARM() \|\|
	EffectiveTriple.isThumb()) &&
	Value != "tls" && Value != "global") {
	D.Diag(diag::err_drv_invalid_value_with_suggestion)
	<< A->getOption().getName() << Value << "tls global";
	return;
	}
	if ((EffectiveTriple.isARM() \|\| EffectiveTriple.isThumb()) &&
	Value == "tls") {
	if (!Args.hasArg(options::OPT_mstack_protector_guard_offset_EQ)) {
	D.Diag(diag::err_drv_ssp_missing_offset_argument)
	<< A->getAsString(Args);
	return;
	}
	// Check whether the target subarch supports the hardware TLS register
	if (!arm::isHardTPSupported(EffectiveTriple)) {
	D.Diag(diag::err_target_unsupported_tp_hard)
	<< EffectiveTriple.getArchName();
	return;
	}
	// Check whether the user asked for something other than -mtp=cp15
	if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) {
	StringRef Value = A->getValue();
	if (Value != "cp15") {
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< A->getAsString(Args) << "-mstack-protector-guard=tls";
	return;
	}
	}
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+read-tp-hard");
	}
	if (EffectiveTriple.isAArch64() && Value != "sysreg" && Value != "global") {
	D.Diag(diag::err_drv_invalid_value_with_suggestion)
	<< A->getOption().getName() << Value << "sysreg global";
	return;
	}
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_offset_EQ)) {
	StringRef Value = A->getValue();
	if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() &&
	!EffectiveTriple.isARM() && !EffectiveTriple.isThumb())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	int Offset;
	if (Value.getAsInteger(10, Offset)) {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
	return;
	}
	if ((EffectiveTriple.isARM() \|\| EffectiveTriple.isThumb()) &&
	(Offset < 0 \|\| Offset > 0xfffff)) {
	D.Diag(diag::err_drv_invalid_int_value)
	<< A->getOption().getName() << Value;
	return;
	}
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_reg_EQ)) {
	StringRef Value = A->getValue();
	if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	if (EffectiveTriple.isX86() && (Value != "fs" && Value != "gs")) {
	D.Diag(diag::err_drv_invalid_value_with_suggestion)
	<< A->getOption().getName() << Value << "fs gs";
	return;
	}
	if (EffectiveTriple.isAArch64() && Value != "sp_el0") {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
	return;
	}
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_symbol_EQ)) {
	StringRef Value = A->getValue();
	if (!isValidSymbolName(Value)) {
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< A->getOption().getName() << "legal symbol name";
	return;
	}
	A->render(Args, CmdArgs);
	}
	}

	static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs) {
	const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();

	if (!EffectiveTriple.isOSFreeBSD() && !EffectiveTriple.isOSLinux())
	return;

	if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
	!EffectiveTriple.isPPC64())
	return;

	Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
	options::OPT_fno_stack_clash_protection);
	}

	static void RenderTrivialAutoVarInitOptions(const Driver &D,
	const ToolChain &TC,
	const ArgList &Args,
	ArgStringList &CmdArgs) {
	auto DefaultTrivialAutoVarInit = TC.GetDefaultTrivialAutoVarInit();
	StringRef TrivialAutoVarInit = "";

	for (const Arg *A : Args) {
	switch (A->getOption().getID()) {
	default:
	continue;
	case options::OPT_ftrivial_auto_var_init: {
	A->claim();
	StringRef Val = A->getValue();
	if (Val == "uninitialized" \|\| Val == "zero" \|\| Val == "pattern")
	TrivialAutoVarInit = Val;
	else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Val;
	break;
	}
	}
	}

	if (TrivialAutoVarInit.empty())
	switch (DefaultTrivialAutoVarInit) {
	case LangOptions::TrivialAutoVarInitKind::Uninitialized:
	break;
	case LangOptions::TrivialAutoVarInitKind::Pattern:
	TrivialAutoVarInit = "pattern";
	break;
	case LangOptions::TrivialAutoVarInitKind::Zero:
	TrivialAutoVarInit = "zero";
	break;
	}

	if (!TrivialAutoVarInit.empty()) {
	CmdArgs.push_back(
	Args.MakeArgString("-ftrivial-auto-var-init=" + TrivialAutoVarInit));
	}

	if (Arg *A =
	Args.getLastArg(options::OPT_ftrivial_auto_var_init_stop_after)) {
	if (!Args.hasArg(options::OPT_ftrivial_auto_var_init) \|\|
	StringRef(
	Args.getLastArg(options::OPT_ftrivial_auto_var_init)->getValue()) ==
	"uninitialized")
	D.Diag(diag::err_drv_trivial_auto_var_init_stop_after_missing_dependency);
	A->claim();
	StringRef Val = A->getValue();
	if (std::stoi(Val.str()) <= 0)
	D.Diag(diag::err_drv_trivial_auto_var_init_stop_after_invalid_value);
	CmdArgs.push_back(
	Args.MakeArgString("-ftrivial-auto-var-init-stop-after=" + Val));
	}
	}

	static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs,
	types::ID InputType) {
	// cl-denorms-are-zero is not forwarded. It is translated into a generic flag
	// for denormal flushing handling based on the target.
	const unsigned ForwardedArguments[] = {
	options::OPT_cl_opt_disable,
	options::OPT_cl_strict_aliasing,
	options::OPT_cl_single_precision_constant,
	options::OPT_cl_finite_math_only,
	options::OPT_cl_kernel_arg_info,
	options::OPT_cl_unsafe_math_optimizations,
	options::OPT_cl_fast_relaxed_math,
	options::OPT_cl_mad_enable,
	options::OPT_cl_no_signed_zeros,
	options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
	options::OPT_cl_uniform_work_group_size
	};

	if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
	std::string CLStdStr = std::string("-cl-std=") + A->getValue();
	CmdArgs.push_back(Args.MakeArgString(CLStdStr));
	} else if (Arg *A = Args.getLastArg(options::OPT_cl_ext_EQ)) {
	std::string CLExtStr = std::string("-cl-ext=") + A->getValue();
	CmdArgs.push_back(Args.MakeArgString(CLExtStr));
	}

	for (const auto &Arg : ForwardedArguments)
	if (const auto *A = Args.getLastArg(Arg))
	CmdArgs.push_back(Args.MakeArgString(A->getOption().getPrefixedName()));

	// Only add the default headers if we are compiling OpenCL sources.
	if ((types::isOpenCL(InputType) \|\|
	(Args.hasArg(options::OPT_cl_std_EQ) && types::isSrcFile(InputType))) &&
	!Args.hasArg(options::OPT_cl_no_stdinc)) {
	CmdArgs.push_back("-finclude-default-header");
	CmdArgs.push_back("-fdeclare-opencl-builtins");
	}
	}

	static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs,
	types::ID InputType) {
	const unsigned ForwardedArguments[] = {options::OPT_dxil_validator_version,
	options::OPT_D,
	options::OPT_I,
	options::OPT_S,
	options::OPT_O,
	options::OPT_emit_llvm,
	options::OPT_emit_obj,
	options::OPT_disable_llvm_passes,
	options::OPT_fnative_half_type,
	options::OPT_hlsl_entrypoint};
	if (!types::isHLSL(InputType))
	return;
	for (const auto &Arg : ForwardedArguments)
	if (const auto *A = Args.getLastArg(Arg))
	A->renderAsInput(Args, CmdArgs);
	// Add the default headers if dxc_no_stdinc is not set.
	if (!Args.hasArg(options::OPT_dxc_no_stdinc) &&
	!Args.hasArg(options::OPT_nostdinc))
	CmdArgs.push_back("-finclude-default-header");
	}

	static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	bool ARCMTEnabled = false;
	if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) {
	if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check,
	options::OPT_ccc_arcmt_modify,
	options::OPT_ccc_arcmt_migrate)) {
	ARCMTEnabled = true;
	switch (A->getOption().getID()) {
	default: llvm_unreachable("missed a case");
	case options::OPT_ccc_arcmt_check:
	CmdArgs.push_back("-arcmt-action=check");
	break;
	case options::OPT_ccc_arcmt_modify:
	CmdArgs.push_back("-arcmt-action=modify");
	break;
	case options::OPT_ccc_arcmt_migrate:
	CmdArgs.push_back("-arcmt-action=migrate");
	CmdArgs.push_back("-mt-migrate-directory");
	CmdArgs.push_back(A->getValue());

	Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output);
	Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors);
	break;
	}
	}
	} else {
	Args.ClaimAllArgs(options::OPT_ccc_arcmt_check);
	Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify);
	Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate);
	}

	if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) {
	if (ARCMTEnabled)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< A->getAsString(Args) << "-ccc-arcmt-migrate";

	CmdArgs.push_back("-mt-migrate-directory");
	CmdArgs.push_back(A->getValue());

	if (!Args.hasArg(options::OPT_objcmt_migrate_literals,
	options::OPT_objcmt_migrate_subscripting,
	options::OPT_objcmt_migrate_property)) {
	// None specified, means enable them all.
	CmdArgs.push_back("-objcmt-migrate-literals");
	CmdArgs.push_back("-objcmt-migrate-subscripting");
	CmdArgs.push_back("-objcmt-migrate-property");
	} else {
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
	}
	} else {
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_allowlist_dir_path);
	}
	}

	static void RenderBuiltinOptions(const ToolChain &TC, const llvm::Triple &T,
	const ArgList &Args, ArgStringList &CmdArgs) {
	// -fbuiltin is default unless -mkernel is used.
	bool UseBuiltins =
	Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin,
	!Args.hasArg(options::OPT_mkernel));
	if (!UseBuiltins)
	CmdArgs.push_back("-fno-builtin");

	// -ffreestanding implies -fno-builtin.
	if (Args.hasArg(options::OPT_ffreestanding))
	UseBuiltins = false;

	// Process the -fno-builtin-* options.
	for (const Arg *A : Args.filtered(options::OPT_fno_builtin_)) {
	A->claim();

	// If -fno-builtin is specified, then there's no need to pass the option to
	// the frontend.
	if (UseBuiltins)
	A->render(Args, CmdArgs);
	}

	// le32-specific flags:
	// -fno-math-builtin: clang should not convert math builtins to intrinsics
	// by default.
	if (TC.getArch() == llvm::Triple::le32)
	CmdArgs.push_back("-fno-math-builtin");
	}

	bool Driver::getDefaultModuleCachePath(SmallVectorImpl<char> &Result) {
	if (const char *Str = std::getenv("CLANG_MODULE_CACHE_PATH")) {
	Twine Path{Str};
	Path.toVector(Result);
	return Path.getSingleStringRef() != "";
	}
	if (llvm::sys::path::cache_directory(Result)) {
	llvm::sys::path::append(Result, "clang");
	llvm::sys::path::append(Result, "ModuleCache");
	return true;
	}
	return false;
	}

	static bool RenderModulesOptions(Compilation &C, const Driver &D,
	const ArgList &Args, const InputInfo &Input,
	const InputInfo &Output, const Arg *Std,
	ArgStringList &CmdArgs) {
	bool IsCXX = types::isCXX(Input.getType());
	// FIXME: Find a better way to determine whether the input has standard c++
	// modules support by default.
	bool HaveStdCXXModules =
	IsCXX && Std &&
	(Std->containsValue("c++2a") \|\| Std->containsValue("c++20") \|\|
	Std->containsValue("c++2b") \|\| Std->containsValue("c++latest"));
	bool HaveModules = HaveStdCXXModules;

	// -fmodules enables the use of precompiled modules (off by default).
	// Users can pass -fno-cxx-modules to turn off modules support for
	// C++/Objective-C++ programs.
	bool HaveClangModules = false;
	if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
	bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
	options::OPT_fno_cxx_modules, true);
	if (AllowedInCXX \|\| !IsCXX) {
	CmdArgs.push_back("-fmodules");
	HaveClangModules = true;
	}
	}

	HaveModules \|= HaveClangModules;
	if (Args.hasArg(options::OPT_fmodules_ts)) {
	D.Diag(diag::warn_deprecated_fmodules_ts_flag);
	CmdArgs.push_back("-fmodules-ts");
	HaveModules = true;
	}

	// -fmodule-maps enables implicit reading of module map files. By default,
	// this is enabled if we are using Clang's flavor of precompiled modules.
	if (Args.hasFlag(options::OPT_fimplicit_module_maps,
	options::OPT_fno_implicit_module_maps, HaveClangModules))
	CmdArgs.push_back("-fimplicit-module-maps");

	// -fmodules-decluse checks that modules used are declared so (off by default)
	Args.addOptInFlag(CmdArgs, options::OPT_fmodules_decluse,
	options::OPT_fno_modules_decluse);

	// -fmodules-strict-decluse is like -fmodule-decluse, but also checks that
	// all #included headers are part of modules.
	if (Args.hasFlag(options::OPT_fmodules_strict_decluse,
	options::OPT_fno_modules_strict_decluse, false))
	CmdArgs.push_back("-fmodules-strict-decluse");

	// -fno-implicit-modules turns off implicitly compiling modules on demand.
	bool ImplicitModules = false;
	if (!Args.hasFlag(options::OPT_fimplicit_modules,
	options::OPT_fno_implicit_modules, HaveClangModules)) {
	if (HaveModules)
	CmdArgs.push_back("-fno-implicit-modules");
	} else if (HaveModules) {
	ImplicitModules = true;
	// -fmodule-cache-path specifies where our implicitly-built module files
	// should be written.
	SmallString<128> Path;
	if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path))
	Path = A->getValue();

	bool HasPath = true;
	if (C.isForDiagnostics()) {
	// When generating crash reports, we want to emit the modules along with
	// the reproduction sources, so we ignore any provided module path.
	Path = Output.getFilename();
	llvm::sys::path::replace_extension(Path, ".cache");
	llvm::sys::path::append(Path, "modules");
	} else if (Path.empty()) {
	// No module path was provided: use the default.
	HasPath = Driver::getDefaultModuleCachePath(Path);
	}

	// `HasPath` will only be false if getDefaultModuleCachePath() fails.
	// That being said, that failure is unlikely and not caching is harmless.
	if (HasPath) {
	const char Arg[] = "-fmodules-cache-path=";
	Path.insert(Path.begin(), Arg, Arg + strlen(Arg));
	CmdArgs.push_back(Args.MakeArgString(Path));
	}
	}

	if (HaveModules) {
	// -fprebuilt-module-path specifies where to load the prebuilt module files.
	for (const Arg *A : Args.filtered(options::OPT_fprebuilt_module_path)) {
	CmdArgs.push_back(Args.MakeArgString(
	std::string("-fprebuilt-module-path=") + A->getValue()));
	A->claim();
	}
	if (Args.hasFlag(options::OPT_fprebuilt_implicit_modules,
	options::OPT_fno_prebuilt_implicit_modules, false))
	CmdArgs.push_back("-fprebuilt-implicit-modules");
	if (Args.hasFlag(options::OPT_fmodules_validate_input_files_content,
	options::OPT_fno_modules_validate_input_files_content,
	false))
	CmdArgs.push_back("-fvalidate-ast-input-files-content");
	}

	// -fmodule-name specifies the module that is currently being built (or
	// used for header checking by -fmodule-maps).
	Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ);

	// -fmodule-map-file can be used to specify files containing module
	// definitions.
	Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);

	// -fbuiltin-module-map can be used to load the clang
	// builtin headers modulemap file.
	if (Args.hasArg(options::OPT_fbuiltin_module_map)) {
	SmallString<128> BuiltinModuleMap(D.ResourceDir);
	llvm::sys::path::append(BuiltinModuleMap, "include");
	llvm::sys::path::append(BuiltinModuleMap, "module.modulemap");
	if (llvm::sys::fs::exists(BuiltinModuleMap))
	CmdArgs.push_back(
	Args.MakeArgString("-fmodule-map-file=" + BuiltinModuleMap));
	}

	// The -fmodule-file=<name>=<file> form specifies the mapping of module
	// names to precompiled module files (the module is loaded only if used).
	// The -fmodule-file=<file> form can be used to unconditionally load
	// precompiled module files (whether used or not).
	if (HaveModules)
	Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
	else
	Args.ClaimAllArgs(options::OPT_fmodule_file);

	// When building modules and generating crashdumps, we need to dump a module
	// dependency VFS alongside the output.
	if (HaveClangModules && C.isForDiagnostics()) {
	SmallString<128> VFSDir(Output.getFilename());
	llvm::sys::path::replace_extension(VFSDir, ".cache");
	// Add the cache directory as a temp so the crash diagnostics pick it up.
	C.addTempFile(Args.MakeArgString(VFSDir));

	llvm::sys::path::append(VFSDir, "vfs");
	CmdArgs.push_back("-module-dependency-dir");
	CmdArgs.push_back(Args.MakeArgString(VFSDir));
	}

	if (HaveClangModules)
	Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path);

	// Pass through all -fmodules-ignore-macro arguments.
	Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro);
	Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval);
	Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after);

	if (HaveClangModules) {
	Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp);

	if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) {
	if (Args.hasArg(options::OPT_fbuild_session_timestamp))
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< A->getAsString(Args) << "-fbuild-session-timestamp";

	llvm::sys::fs::file_status Status;
	if (llvm::sys::fs::status(A->getValue(), Status))
	D.Diag(diag::err_drv_no_such_file) << A->getValue();
	CmdArgs.push_back(Args.MakeArgString(
	"-fbuild-session-timestamp=" +
	Twine((uint64_t)std::chrono::duration_cast<std::chrono::seconds>(
	Status.getLastModificationTime().time_since_epoch())
	.count())));
	}

	if (Args.getLastArg(
	options::OPT_fmodules_validate_once_per_build_session)) {
	if (!Args.getLastArg(options::OPT_fbuild_session_timestamp,
	options::OPT_fbuild_session_file))
	D.Diag(diag::err_drv_modules_validate_once_requires_timestamp);

	Args.AddLastArg(CmdArgs,
	options::OPT_fmodules_validate_once_per_build_session);
	}

	if (Args.hasFlag(options::OPT_fmodules_validate_system_headers,
	options::OPT_fno_modules_validate_system_headers,
	ImplicitModules))
	CmdArgs.push_back("-fmodules-validate-system-headers");

	Args.AddLastArg(CmdArgs,
	options::OPT_fmodules_disable_diagnostic_validation);
	} else {
	Args.ClaimAllArgs(options::OPT_fbuild_session_timestamp);
	Args.ClaimAllArgs(options::OPT_fbuild_session_file);
	Args.ClaimAllArgs(options::OPT_fmodules_validate_once_per_build_session);
	Args.ClaimAllArgs(options::OPT_fmodules_validate_system_headers);
	Args.ClaimAllArgs(options::OPT_fno_modules_validate_system_headers);
	Args.ClaimAllArgs(options::OPT_fmodules_disable_diagnostic_validation);
	}

	// Claim `-fmodule-output` and `-fmodule-output=` to avoid unused warnings.
	Args.ClaimAllArgs(options::OPT_fmodule_output);
	Args.ClaimAllArgs(options::OPT_fmodule_output_EQ);

	return HaveModules;
	}

	static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T,
	ArgStringList &CmdArgs) {
	// -fsigned-char is default.
	if (const Arg *A = Args.getLastArg(options::OPT_fsigned_char,
	options::OPT_fno_signed_char,
	options::OPT_funsigned_char,
	options::OPT_fno_unsigned_char)) {
	if (A->getOption().matches(options::OPT_funsigned_char) \|\|
	A->getOption().matches(options::OPT_fno_signed_char)) {
	CmdArgs.push_back("-fno-signed-char");
	}
	} else if (!isSignedCharDefault(T)) {
	CmdArgs.push_back("-fno-signed-char");
	}

	// The default depends on the language standard.
	Args.AddLastArg(CmdArgs, options::OPT_fchar8__t, options::OPT_fno_char8__t);

	if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar,
	options::OPT_fno_short_wchar)) {
	if (A->getOption().matches(options::OPT_fshort_wchar)) {
	CmdArgs.push_back("-fwchar-type=short");
	CmdArgs.push_back("-fno-signed-wchar");
	} else {
	bool IsARM = T.isARM() \|\| T.isThumb() \|\| T.isAArch64();
	CmdArgs.push_back("-fwchar-type=int");
	if (T.isOSzOS() \|\|
	(IsARM && !(T.isOSWindows() \|\| T.isOSNetBSD() \|\| T.isOSOpenBSD())))
	CmdArgs.push_back("-fno-signed-wchar");
	else
	CmdArgs.push_back("-fsigned-wchar");
	}
	}
	}

	static void RenderObjCOptions(const ToolChain &TC, const Driver &D,
	const llvm::Triple &T, const ArgList &Args,
	ObjCRuntime &Runtime, bool InferCovariantReturns,
	const InputInfo &Input, ArgStringList &CmdArgs) {
	const llvm::Triple::ArchType Arch = TC.getArch();

	// -fobjc-dispatch-method is only relevant with the nonfragile-abi, and legacy
	// is the default. Except for deployment target of 10.5, next runtime is
	// always legacy dispatch and -fno-objc-legacy-dispatch gets ignored silently.
	if (Runtime.isNonFragile()) {
	if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch,
	options::OPT_fno_objc_legacy_dispatch,
	Runtime.isLegacyDispatchDefaultForArch(Arch))) {
	if (TC.UseObjCMixedDispatch())
	CmdArgs.push_back("-fobjc-dispatch-method=mixed");
	else
	CmdArgs.push_back("-fobjc-dispatch-method=non-legacy");
	}
	}

	// When ObjectiveC legacy runtime is in effect on MacOSX, turn on the option
	// to do Array/Dictionary subscripting by default.
	if (Arch == llvm::Triple::x86 && T.isMacOSX() &&
	Runtime.getKind() == ObjCRuntime::FragileMacOSX && Runtime.isNeXTFamily())
	CmdArgs.push_back("-fobjc-subscripting-legacy-runtime");

	// Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc.
	// NOTE: This logic is duplicated in ToolChains.cpp.
	if (isObjCAutoRefCount(Args)) {
	TC.CheckObjCARC();

	CmdArgs.push_back("-fobjc-arc");

	// FIXME: It seems like this entire block, and several around it should be
	// wrapped in isObjC, but for now we just use it here as this is where it
	// was being used previously.
	if (types::isCXX(Input.getType()) && types::isObjC(Input.getType())) {
	if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
	CmdArgs.push_back("-fobjc-arc-cxxlib=libc++");
	else
	CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++");
	}

	// Allow the user to enable full exceptions code emission.
	// We default off for Objective-C, on for Objective-C++.
	if (Args.hasFlag(options::OPT_fobjc_arc_exceptions,
	options::OPT_fno_objc_arc_exceptions,
	/Default=/types::isCXX(Input.getType())))
	CmdArgs.push_back("-fobjc-arc-exceptions");
	}

	// Silence warning for full exception code emission options when explicitly
	// set to use no ARC.
	if (Args.hasArg(options::OPT_fno_objc_arc)) {
	Args.ClaimAllArgs(options::OPT_fobjc_arc_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_objc_arc_exceptions);
	}

	// Allow the user to control whether messages can be converted to runtime
	// functions.
	if (types::isObjC(Input.getType())) {
	auto *Arg = Args.getLastArg(
	options::OPT_fobjc_convert_messages_to_runtime_calls,
	options::OPT_fno_objc_convert_messages_to_runtime_calls);
	if (Arg &&
	Arg->getOption().matches(
	options::OPT_fno_objc_convert_messages_to_runtime_calls))
	CmdArgs.push_back("-fno-objc-convert-messages-to-runtime-calls");
	}

	// -fobjc-infer-related-result-type is the default, except in the Objective-C
	// rewriter.
	if (InferCovariantReturns)
	CmdArgs.push_back("-fno-objc-infer-related-result-type");

	// Pass down -fobjc-weak or -fno-objc-weak if present.
	if (types::isObjC(Input.getType())) {
	auto WeakArg =
	Args.getLastArg(options::OPT_fobjc_weak, options::OPT_fno_objc_weak);
	if (!WeakArg) {
	// nothing to do
	} else if (!Runtime.allowsWeak()) {
	if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
	D.Diag(diag::err_objc_weak_unsupported);
	} else {
	WeakArg->render(Args, CmdArgs);
	}
	}

	if (Args.hasArg(options::OPT_fobjc_disable_direct_methods_for_testing))
	CmdArgs.push_back("-fobjc-disable-direct-methods-for-testing");
	}

	static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	bool CaretDefault = true;
	bool ColumnDefault = true;

	if (const Arg *A = Args.getLastArg(options::OPT__SLASH_diagnostics_classic,
	options::OPT__SLASH_diagnostics_column,
	options::OPT__SLASH_diagnostics_caret)) {
	switch (A->getOption().getID()) {
	case options::OPT__SLASH_diagnostics_caret:
	CaretDefault = true;
	ColumnDefault = true;
	break;
	case options::OPT__SLASH_diagnostics_column:
	CaretDefault = false;
	ColumnDefault = true;
	break;
	case options::OPT__SLASH_diagnostics_classic:
	CaretDefault = false;
	ColumnDefault = false;
	break;
	}
	}

	// -fcaret-diagnostics is default.
	if (!Args.hasFlag(options::OPT_fcaret_diagnostics,
	options::OPT_fno_caret_diagnostics, CaretDefault))
	CmdArgs.push_back("-fno-caret-diagnostics");

	Args.addOptOutFlag(CmdArgs, options::OPT_fdiagnostics_fixit_info,
	options::OPT_fno_diagnostics_fixit_info);
	Args.addOptOutFlag(CmdArgs, options::OPT_fdiagnostics_show_option,
	options::OPT_fno_diagnostics_show_option);

	if (const Arg *A =
	Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
	CmdArgs.push_back("-fdiagnostics-show-category");
	CmdArgs.push_back(A->getValue());
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fdiagnostics_show_hotness,
	options::OPT_fno_diagnostics_show_hotness);

	if (const Arg *A =
	Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) {
	std::string Opt =
	std::string("-fdiagnostics-hotness-threshold=") + A->getValue();
	CmdArgs.push_back(Args.MakeArgString(Opt));
	}

	if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
	CmdArgs.push_back("-fdiagnostics-format");
	CmdArgs.push_back(A->getValue());
	if (StringRef(A->getValue()) == "sarif" \|\|
	StringRef(A->getValue()) == "SARIF")
	D.Diag(diag::warn_drv_sarif_format_unstable);
	}

	if (const Arg *A = Args.getLastArg(
	options::OPT_fdiagnostics_show_note_include_stack,
	options::OPT_fno_diagnostics_show_note_include_stack)) {
	const Option &O = A->getOption();
	if (O.matches(options::OPT_fdiagnostics_show_note_include_stack))
	CmdArgs.push_back("-fdiagnostics-show-note-include-stack");
	else
	CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
	}

	// Color diagnostics are parsed by the driver directly from argv and later
	// re-parsed to construct this job; claim any possible color diagnostic here
	// to avoid warn_drv_unused_argument and diagnose bad
	// OPT_fdiagnostics_color_EQ values.
	Args.getLastArg(options::OPT_fcolor_diagnostics,
	options::OPT_fno_color_diagnostics);
	if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_color_EQ)) {
	StringRef Value(A->getValue());
	if (Value != "always" && Value != "never" && Value != "auto")
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Value << A->getOption().getName();
	}

	if (D.getDiags().getDiagnosticOptions().ShowColors)
	CmdArgs.push_back("-fcolor-diagnostics");

	if (Args.hasArg(options::OPT_fansi_escape_codes))
	CmdArgs.push_back("-fansi-escape-codes");

	Args.addOptOutFlag(CmdArgs, options::OPT_fshow_source_location,
	options::OPT_fno_show_source_location);

	if (Args.hasArg(options::OPT_fdiagnostics_absolute_paths))
	CmdArgs.push_back("-fdiagnostics-absolute-paths");

	if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column,
	ColumnDefault))
	CmdArgs.push_back("-fno-show-column");

	Args.addOptOutFlag(CmdArgs, options::OPT_fspell_checking,
	options::OPT_fno_spell_checking);
	}

	DwarfFissionKind tools::getDebugFissionKind(const Driver &D,
	const ArgList &Args, Arg *&Arg) {
	Arg = Args.getLastArg(options::OPT_gsplit_dwarf, options::OPT_gsplit_dwarf_EQ,
	options::OPT_gno_split_dwarf);
	if (!Arg \|\| Arg->getOption().matches(options::OPT_gno_split_dwarf))
	return DwarfFissionKind::None;

	if (Arg->getOption().matches(options::OPT_gsplit_dwarf))
	return DwarfFissionKind::Split;

	StringRef Value = Arg->getValue();
	if (Value == "split")
	return DwarfFissionKind::Split;
	if (Value == "single")
	return DwarfFissionKind::Single;

	D.Diag(diag::err_drv_unsupported_option_argument)
	<< Arg->getSpelling() << Arg->getValue();
	return DwarfFissionKind::None;
	}

	static void renderDwarfFormat(const Driver &D, const llvm::Triple &T,
	const ArgList &Args, ArgStringList &CmdArgs,
	unsigned DwarfVersion) {
	auto *DwarfFormatArg =
	Args.getLastArg(options::OPT_gdwarf64, options::OPT_gdwarf32);
	if (!DwarfFormatArg)
	return;

	if (DwarfFormatArg->getOption().matches(options::OPT_gdwarf64)) {
	if (DwarfVersion < 3)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< DwarfFormatArg->getAsString(Args) << "DWARFv3 or greater";
	else if (!T.isArch64Bit())
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< DwarfFormatArg->getAsString(Args) << "64 bit architecture";
	else if (!T.isOSBinFormatELF())
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< DwarfFormatArg->getAsString(Args) << "ELF platforms";
	}

	DwarfFormatArg->render(Args, CmdArgs);
	}

	static void renderDebugOptions(const ToolChain &TC, const Driver &D,
	const llvm::Triple &T, const ArgList &Args,
	bool EmitCodeView, bool IRInput,
	ArgStringList &CmdArgs,
	codegenoptions::DebugInfoKind &DebugInfoKind,
	DwarfFissionKind &DwarfFission) {
	if (Args.hasFlag(options::OPT_fdebug_info_for_profiling,
	options::OPT_fno_debug_info_for_profiling, false) &&
	checkDebugInfoOption(
	Args.getLastArg(options::OPT_fdebug_info_for_profiling), Args, D, TC))
	CmdArgs.push_back("-fdebug-info-for-profiling");

	// The 'g' groups options involve a somewhat intricate sequence of decisions
	// about what to pass from the driver to the frontend, but by the time they
	// reach cc1 they've been factored into three well-defined orthogonal choices:
	// * what level of debug info to generate
	// * what dwarf version to write
	// * what debugger tuning to use
	// This avoids having to monkey around further in cc1 other than to disable
	// codeview if not running in a Windows environment. Perhaps even that
	// decision should be made in the driver as well though.
	llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();

	bool SplitDWARFInlining =
	Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
	options::OPT_fno_split_dwarf_inlining, false);

	// Normally -gsplit-dwarf is only useful with -gN. For IR input, Clang does
	// object file generation and no IR generation, -gN should not be needed. So
	// allow -gsplit-dwarf with either -gN or IR input.
	if (IRInput \|\| Args.hasArg(options::OPT_g_Group)) {
	Arg *SplitDWARFArg;
	DwarfFission = getDebugFissionKind(D, Args, SplitDWARFArg);
	if (DwarfFission != DwarfFissionKind::None &&
	!checkDebugInfoOption(SplitDWARFArg, Args, D, TC)) {
	DwarfFission = DwarfFissionKind::None;
	SplitDWARFInlining = false;
	}
	}
	if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
	DebugInfoKind = codegenoptions::DebugInfoConstructor;

	// If the last option explicitly specified a debug-info level, use it.
	if (checkDebugInfoOption(A, Args, D, TC) &&
	A->getOption().matches(options::OPT_gN_Group)) {
	DebugInfoKind = DebugLevelToInfoKind(*A);
	// For -g0 or -gline-tables-only, drop -gsplit-dwarf. This gets a bit more
	// complicated if you've disabled inline info in the skeleton CUs
	// (SplitDWARFInlining) - then there's value in composing split-dwarf and
	// line-tables-only, so let those compose naturally in that case.
	if (DebugInfoKind == codegenoptions::NoDebugInfo \|\|
	DebugInfoKind == codegenoptions::DebugDirectivesOnly \|\|
	(DebugInfoKind == codegenoptions::DebugLineTablesOnly &&
	SplitDWARFInlining))
	DwarfFission = DwarfFissionKind::None;
	}
	}

	// If a debugger tuning argument appeared, remember it.
	bool HasDebuggerTuning = false;
	if (const Arg *A =
	Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) {
	HasDebuggerTuning = true;
	if (checkDebugInfoOption(A, Args, D, TC)) {
	if (A->getOption().matches(options::OPT_glldb))
	DebuggerTuning = llvm::DebuggerKind::LLDB;
	else if (A->getOption().matches(options::OPT_gsce))
	DebuggerTuning = llvm::DebuggerKind::SCE;
	else if (A->getOption().matches(options::OPT_gdbx))
	DebuggerTuning = llvm::DebuggerKind::DBX;
	else
	DebuggerTuning = llvm::DebuggerKind::GDB;
	}
	}

	// If a -gdwarf argument appeared, remember it.
	bool EmitDwarf = false;
	if (const Arg *A = getDwarfNArg(Args))
	EmitDwarf = checkDebugInfoOption(A, Args, D, TC);

	if (const Arg *A = Args.getLastArg(options::OPT_gcodeview))
	EmitCodeView = checkDebugInfoOption(A, Args, D, TC);

	// If the user asked for debug info but did not explicitly specify -gcodeview
	// or -gdwarf, ask the toolchain for the default format.
	if (!EmitCodeView && !EmitDwarf &&
	DebugInfoKind != codegenoptions::NoDebugInfo) {
	switch (TC.getDefaultDebugFormat()) {
	case codegenoptions::DIF_CodeView:
	EmitCodeView = true;
	break;
	case codegenoptions::DIF_DWARF:
	EmitDwarf = true;
	break;
	}
	}

	unsigned RequestedDWARFVersion = 0; // DWARF version requested by the user
	unsigned EffectiveDWARFVersion = 0; // DWARF version TC can generate. It may
	// be lower than what the user wanted.
	if (EmitDwarf) {
	RequestedDWARFVersion = getDwarfVersion(TC, Args);
	// Clamp effective DWARF version to the max supported by the toolchain.
	EffectiveDWARFVersion =
	std::min(RequestedDWARFVersion, TC.getMaxDwarfVersion());
	} else {
	Args.ClaimAllArgs(options::OPT_fdebug_default_version);
	}

	// -gline-directives-only supported only for the DWARF debug info.
	if (RequestedDWARFVersion == 0 &&
	DebugInfoKind == codegenoptions::DebugDirectivesOnly)
	DebugInfoKind = codegenoptions::NoDebugInfo;

	// strict DWARF is set to false by default. But for DBX, we need it to be set
	// as true by default.
	if (const Arg *A = Args.getLastArg(options::OPT_gstrict_dwarf))
	(void)checkDebugInfoOption(A, Args, D, TC);
	if (Args.hasFlag(options::OPT_gstrict_dwarf, options::OPT_gno_strict_dwarf,
	DebuggerTuning == llvm::DebuggerKind::DBX))
	CmdArgs.push_back("-gstrict-dwarf");

	// And we handle flag -grecord-gcc-switches later with DWARFDebugFlags.
	Args.ClaimAllArgs(options::OPT_g_flags_Group);

	// Column info is included by default for everything except SCE and
	// CodeView. Clang doesn't track end columns, just starting columns, which,
	// in theory, is fine for CodeView (and PDB). In practice, however, the
	// Microsoft debuggers don't handle missing end columns well, and the AIX
	// debugger DBX also doesn't handle the columns well, so it's better not to
	// include any column info.
	if (const Arg *A = Args.getLastArg(options::OPT_gcolumn_info))
	(void)checkDebugInfoOption(A, Args, D, TC);
	if (!Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info,
	!EmitCodeView &&
	(DebuggerTuning != llvm::DebuggerKind::SCE &&
	DebuggerTuning != llvm::DebuggerKind::DBX)))
	CmdArgs.push_back("-gno-column-info");

	// FIXME: Move backend command line options to the module.
	if (Args.hasFlag(options::OPT_gmodules, options::OPT_gno_modules, false)) {
	// If -gline-tables-only or -gline-directives-only is the last option it
	// wins.
	if (checkDebugInfoOption(Args.getLastArg(options::OPT_gmodules), Args, D,
	TC)) {
	if (DebugInfoKind != codegenoptions::DebugLineTablesOnly &&
	DebugInfoKind != codegenoptions::DebugDirectivesOnly) {
	DebugInfoKind = codegenoptions::DebugInfoConstructor;
	CmdArgs.push_back("-dwarf-ext-refs");
	CmdArgs.push_back("-fmodule-format=obj");
	}
	}
	}

	if (T.isOSBinFormatELF() && SplitDWARFInlining)
	CmdArgs.push_back("-fsplit-dwarf-inlining");

	// After we've dealt with all combinations of things that could
	// make DebugInfoKind be other than None or DebugLineTablesOnly,
	// figure out if we need to "upgrade" it to standalone debug info.
	// We parse these two '-f' options whether or not they will be used,
	// to claim them even if you wrote "-fstandalone-debug -gline-tables-only"
	bool NeedFullDebug = Args.hasFlag(
	options::OPT_fstandalone_debug, options::OPT_fno_standalone_debug,
	DebuggerTuning == llvm::DebuggerKind::LLDB \|\|
	TC.GetDefaultStandaloneDebug());
	if (const Arg *A = Args.getLastArg(options::OPT_fstandalone_debug))
	(void)checkDebugInfoOption(A, Args, D, TC);

	if (DebugInfoKind == codegenoptions::LimitedDebugInfo \|\|
	DebugInfoKind == codegenoptions::DebugInfoConstructor) {
	if (Args.hasFlag(options::OPT_fno_eliminate_unused_debug_types,
	options::OPT_feliminate_unused_debug_types, false))
	DebugInfoKind = codegenoptions::UnusedTypeInfo;
	else if (NeedFullDebug)
	DebugInfoKind = codegenoptions::FullDebugInfo;
	}

	if (Args.hasFlag(options::OPT_gembed_source, options::OPT_gno_embed_source,
	false)) {
	// Source embedding is a vendor extension to DWARF v5. By now we have
	// checked if a DWARF version was stated explicitly, and have otherwise
	// fallen back to the target default, so if this is still not at least 5
	// we emit an error.
	const Arg *A = Args.getLastArg(options::OPT_gembed_source);
	if (RequestedDWARFVersion < 5)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< A->getAsString(Args) << "-gdwarf-5";
	else if (EffectiveDWARFVersion < 5)
	// The toolchain has reduced allowed dwarf version, so we can't enable
	// -gembed-source.
	D.Diag(diag::warn_drv_dwarf_version_limited_by_target)
	<< A->getAsString(Args) << TC.getTripleString() << 5
	<< EffectiveDWARFVersion;
	else if (checkDebugInfoOption(A, Args, D, TC))
	CmdArgs.push_back("-gembed-source");
	}

	if (EmitCodeView) {
	CmdArgs.push_back("-gcodeview");

	Args.addOptInFlag(CmdArgs, options::OPT_gcodeview_ghash,
	options::OPT_gno_codeview_ghash);

	Args.addOptOutFlag(CmdArgs, options::OPT_gcodeview_command_line,
	options::OPT_gno_codeview_command_line);
	}

	Args.addOptOutFlag(CmdArgs, options::OPT_ginline_line_tables,
	options::OPT_gno_inline_line_tables);

	// When emitting remarks, we need at least debug lines in the output.
	if (willEmitRemarks(Args) &&
	DebugInfoKind <= codegenoptions::DebugDirectivesOnly)
	DebugInfoKind = codegenoptions::DebugLineTablesOnly;

	// Adjust the debug info kind for the given toolchain.
	TC.adjustDebugInfoKind(DebugInfoKind, Args);

	// On AIX, the debugger tuning option can be omitted if it is not explicitly
	// set.
	RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, EffectiveDWARFVersion,
	T.isOSAIX() && !HasDebuggerTuning
	? llvm::DebuggerKind::Default
	: DebuggerTuning);

	// -fdebug-macro turns on macro debug info generation.
	if (Args.hasFlag(options::OPT_fdebug_macro, options::OPT_fno_debug_macro,
	false))
	if (checkDebugInfoOption(Args.getLastArg(options::OPT_fdebug_macro), Args,
	D, TC))
	CmdArgs.push_back("-debug-info-macro");

	// -ggnu-pubnames turns on gnu style pubnames in the backend.
	const auto *PubnamesArg =
	Args.getLastArg(options::OPT_ggnu_pubnames, options::OPT_gno_gnu_pubnames,
	options::OPT_gpubnames, options::OPT_gno_pubnames);
	if (DwarfFission != DwarfFissionKind::None \|\|
	(PubnamesArg && checkDebugInfoOption(PubnamesArg, Args, D, TC)))
	if (!PubnamesArg \|\|
	(!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
	!PubnamesArg->getOption().matches(options::OPT_gno_pubnames)))
	CmdArgs.push_back(PubnamesArg && PubnamesArg->getOption().matches(
	options::OPT_gpubnames)
	? "-gpubnames"
	: "-ggnu-pubnames");
	const auto *SimpleTemplateNamesArg =
	Args.getLastArg(options::OPT_gsimple_template_names,
	options::OPT_gno_simple_template_names);
	bool ForwardTemplateParams = DebuggerTuning == llvm::DebuggerKind::SCE;
	if (SimpleTemplateNamesArg &&
	checkDebugInfoOption(SimpleTemplateNamesArg, Args, D, TC)) {
	const auto &Opt = SimpleTemplateNamesArg->getOption();
	if (Opt.matches(options::OPT_gsimple_template_names)) {
	ForwardTemplateParams = true;
	CmdArgs.push_back("-gsimple-template-names=simple");
	}
	}

	if (const Arg *A = Args.getLastArg(options::OPT_gsrc_hash_EQ)) {
	StringRef v = A->getValue();
	CmdArgs.push_back(Args.MakeArgString("-gsrc-hash=" + v));
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fdebug_ranges_base_address,
	options::OPT_fno_debug_ranges_base_address);

	// -gdwarf-aranges turns on the emission of the aranges section in the
	// backend.
	// Always enabled for SCE tuning.
	bool NeedAranges = DebuggerTuning == llvm::DebuggerKind::SCE;
	if (const Arg *A = Args.getLastArg(options::OPT_gdwarf_aranges))
	NeedAranges = checkDebugInfoOption(A, Args, D, TC) \|\| NeedAranges;
	if (NeedAranges) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-generate-arange-section");
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fforce_dwarf_frame,
	options::OPT_fno_force_dwarf_frame);

	if (Args.hasFlag(options::OPT_fdebug_types_section,
	options::OPT_fno_debug_types_section, false)) {
	if (!(T.isOSBinFormatELF() \|\| T.isOSBinFormatWasm())) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Args.getLastArg(options::OPT_fdebug_types_section)
	->getAsString(Args)
	<< T.getTriple();
	} else if (checkDebugInfoOption(
	Args.getLastArg(options::OPT_fdebug_types_section), Args, D,
	TC)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-generate-type-units");
	}
	}

	// To avoid join/split of directory+filename, the integrated assembler prefers
	// the directory form of .file on all DWARF versions. GNU as doesn't allow the
	// form before DWARF v5.
	if (!Args.hasFlag(options::OPT_fdwarf_directory_asm,
	options::OPT_fno_dwarf_directory_asm,
	TC.useIntegratedAs() \|\| EffectiveDWARFVersion >= 5))
	CmdArgs.push_back("-fno-dwarf-directory-asm");

	// Decide how to render forward declarations of template instantiations.
	// SCE wants full descriptions, others just get them in the name.
	if (ForwardTemplateParams)
	CmdArgs.push_back("-debug-forward-template-params");

	// Do we need to explicitly import anonymous namespaces into the parent
	// scope?
	if (DebuggerTuning == llvm::DebuggerKind::SCE)
	CmdArgs.push_back("-dwarf-explicit-import");

	renderDwarfFormat(D, T, Args, CmdArgs, EffectiveDWARFVersion);
	RenderDebugInfoCompressionArgs(Args, CmdArgs, D, TC);
	}

	static void ProcessVSRuntimeLibrary(const ArgList &Args,
	ArgStringList &CmdArgs) {
	unsigned RTOptionID = options::OPT__SLASH_MT;

	if (Args.hasArg(options::OPT__SLASH_LDd))
	// The /LDd option implies /MTd. The dependent lib part can be overridden,
	// but defining _DEBUG is sticky.
	RTOptionID = options::OPT__SLASH_MTd;

	if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
	RTOptionID = A->getOption().getID();

	if (Arg *A = Args.getLastArg(options::OPT_fms_runtime_lib_EQ)) {
	RTOptionID = llvm::StringSwitch<unsigned>(A->getValue())
	.Case("static", options::OPT__SLASH_MT)
	.Case("static_dbg", options::OPT__SLASH_MTd)
	.Case("dll", options::OPT__SLASH_MD)
	.Case("dll_dbg", options::OPT__SLASH_MDd)
	.Default(options::OPT__SLASH_MT);
	}

	StringRef FlagForCRT;
	switch (RTOptionID) {
	case options::OPT__SLASH_MD:
	if (Args.hasArg(options::OPT__SLASH_LDd))
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-D_DLL");
	FlagForCRT = "--dependent-lib=msvcrt";
	break;
	case options::OPT__SLASH_MDd:
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-D_DLL");
	FlagForCRT = "--dependent-lib=msvcrtd";
	break;
	case options::OPT__SLASH_MT:
	if (Args.hasArg(options::OPT__SLASH_LDd))
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-flto-visibility-public-std");
	FlagForCRT = "--dependent-lib=libcmt";
	break;
	case options::OPT__SLASH_MTd:
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-flto-visibility-public-std");
	FlagForCRT = "--dependent-lib=libcmtd";
	break;
	default:
	llvm_unreachable("Unexpected option ID.");
	}

	if (Args.hasArg(options::OPT_fms_omit_default_lib)) {
	CmdArgs.push_back("-D_VC_NODEFAULTLIB");
	} else {
	CmdArgs.push_back(FlagForCRT.data());

	// This provides POSIX compatibility (maps 'open' to '_open'), which most
	// users want. The /Za flag to cl.exe turns this off, but it's not
	// implemented in clang.
	CmdArgs.push_back("--dependent-lib=oldnames");
	}
	}

	void Clang::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const ArgList &Args, const char *LinkingOutput) const {
	const auto &TC = getToolChain();
	const llvm::Triple &RawTriple = TC.getTriple();
	const llvm::Triple &Triple = TC.getEffectiveTriple();
	const std::string &TripleStr = Triple.getTriple();

	bool KernelOrKext =
	Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
	const Driver &D = TC.getDriver();
	ArgStringList CmdArgs;

	assert(Inputs.size() >= 1 && "Must have at least one input.");
	// CUDA/HIP compilation may have multiple inputs (source file + results of
	// device-side compilations). OpenMP device jobs also take the host IR as a
	// second input. Module precompilation accepts a list of header files to
	// include as part of the module. API extraction accepts a list of header
	// files whose API information is emitted in the output. All other jobs are
	// expected to have exactly one input.
	bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
	bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
	bool IsHIP = JA.isOffloading(Action::OFK_HIP);
	bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
	bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
	bool IsExtractAPI = isa<ExtractAPIJobAction>(JA);
	bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) \|\|
	JA.isDeviceOffloading(Action::OFK_Host));
	bool IsHostOffloadingAction =
	JA.isHostOffloading(Action::OFK_OpenMP) \|\|
	(JA.isHostOffloading(C.getActiveOffloadKinds()) &&
	Args.hasFlag(options::OPT_offload_new_driver,
	options::OPT_no_offload_new_driver, false));

	bool IsRDCMode =
	Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
	bool IsUsingLTO = D.isUsingLTO(IsDeviceOffloadAction);
	auto LTOMode = D.getLTOMode(IsDeviceOffloadAction);

	// Extract API doesn't have a main input file, so invent a fake one as a
	// placeholder.
	InputInfo ExtractAPIPlaceholderInput(Inputs[0].getType(), "extract-api",
	"extract-api");

	const InputInfo &Input =
	IsExtractAPI ? ExtractAPIPlaceholderInput : Inputs[0];

	InputInfoList ExtractAPIInputs;
	InputInfoList HostOffloadingInputs;
	const InputInfo *CudaDeviceInput = nullptr;
	const InputInfo *OpenMPDeviceInput = nullptr;
	for (const InputInfo &I : Inputs) {
	if (&I == &Input \|\| I.getType() == types::TY_Nothing) {
	// This is the primary input or contains nothing.
	} else if (IsExtractAPI) {
	auto ExpectedInputType = ExtractAPIPlaceholderInput.getType();
	if (I.getType() != ExpectedInputType) {
	D.Diag(diag::err_drv_extract_api_wrong_kind)
	<< I.getFilename() << types::getTypeName(I.getType())
	<< types::getTypeName(ExpectedInputType);
	}
	ExtractAPIInputs.push_back(I);
	} else if (IsHostOffloadingAction) {
	HostOffloadingInputs.push_back(I);
	} else if ((IsCuda \|\| IsHIP) && !CudaDeviceInput) {
	CudaDeviceInput = &I;
	} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
	OpenMPDeviceInput = &I;
	} else {
	llvm_unreachable("unexpectedly given multiple inputs");
	}
	}

	const llvm::Triple *AuxTriple =
	(IsCuda \|\| IsHIP) ? TC.getAuxTriple() : nullptr;
	bool IsWindowsMSVC = RawTriple.isWindowsMSVCEnvironment();
	bool IsIAMCU = RawTriple.isOSIAMCU();

	// Adjust IsWindowsXYZ for CUDA/HIP compilations. Even when compiling in
	// device mode (i.e., getToolchain().getTriple() is NVPTX/AMDGCN, not
	// Windows), we need to pass Windows-specific flags to cc1.
	if (IsCuda \|\| IsHIP)
	IsWindowsMSVC \|= AuxTriple && AuxTriple->isWindowsMSVCEnvironment();

	// C++ is not supported for IAMCU.
	if (IsIAMCU && types::isCXX(Input.getType()))
	D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU";

	// Invoke ourselves in -cc1 mode.
	//
	// FIXME: Implement custom jobs for internal actions.
	CmdArgs.push_back("-cc1");

	// Add the "effective" target triple.
	CmdArgs.push_back("-triple");
	CmdArgs.push_back(Args.MakeArgString(TripleStr));

	if (const Arg *MJ = Args.getLastArg(options::OPT_MJ)) {
	DumpCompilationDatabase(C, MJ->getValue(), TripleStr, Output, Input, Args);
	Args.ClaimAllArgs(options::OPT_MJ);
	} else if (const Arg *GenCDBFragment =
	Args.getLastArg(options::OPT_gen_cdb_fragment_path)) {
	DumpCompilationDatabaseFragmentToDir(GenCDBFragment->getValue(), C,
	TripleStr, Output, Input, Args);
	Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
	}

	if (IsCuda \|\| IsHIP) {
	// We have to pass the triple of the host if compiling for a CUDA/HIP device
	// and vice-versa.
	std::string NormalizedTriple;
	if (JA.isDeviceOffloading(Action::OFK_Cuda) \|\|
	JA.isDeviceOffloading(Action::OFK_HIP))
	NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Host>()
	->getTriple()
	.normalize();
	else {
	// Host-side compilation.
	NormalizedTriple =
	(IsCuda ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
	: C.getSingleOffloadToolChain<Action::OFK_HIP>())
	->getTriple()
	.normalize();
	if (IsCuda) {
	// We need to figure out which CUDA version we're compiling for, as that
	// determines how we load and launch GPU kernels.
	auto CTC = static_cast<const toolchains::CudaToolChain >(
	C.getSingleOffloadToolChain<Action::OFK_Cuda>());
	assert(CTC && "Expected valid CUDA Toolchain.");
	if (CTC && CTC->CudaInstallation.version() != CudaVersion::UNKNOWN)
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-target-sdk-version=") +
	CudaVersionToString(CTC->CudaInstallation.version())));
	}
	}
	CmdArgs.push_back("-aux-triple");
	CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
	}

	if (Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) {
	CmdArgs.push_back("-fsycl-is-device");

	if (Arg *A = Args.getLastArg(options::OPT_sycl_std_EQ)) {
	A->render(Args, CmdArgs);
	} else {
	// Ensure the default version in SYCL mode is 2020.
	CmdArgs.push_back("-sycl-std=2020");
	}
	}

	if (IsOpenMPDevice) {
	// We have to pass the triple of the host if compiling for an OpenMP device.
	std::string NormalizedTriple =
	C.getSingleOffloadToolChain<Action::OFK_Host>()
	->getTriple()
	.normalize();
	CmdArgs.push_back("-aux-triple");
	CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
	}

	if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm \|\|
	Triple.getArch() == llvm::Triple::thumb)) {
	unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
	unsigned Version = 0;
	bool Failure =
	Triple.getArchName().substr(Offset).consumeInteger(10, Version);
	if (Failure \|\| Version < 7)
	D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
	<< TripleStr;
	}

	// Push all default warning arguments that are specific to
	// the given target. These come before user provided warning options
	// are provided.
	TC.addClangWarningOptions(CmdArgs);

	// FIXME: Subclass ToolChain for SPIR and move this to addClangWarningOptions.
	if (Triple.isSPIR() \|\| Triple.isSPIRV())
	CmdArgs.push_back("-Wspir-compat");

	// Select the appropriate action.
	RewriteKind rewriteKind = RK_None;

	// If CollectArgsForIntegratedAssembler() isn't called below, claim the args
	// it claims when not running an assembler. Otherwise, clang would emit
	// "argument unused" warnings for assembler flags when e.g. adding "-E" to
	// flags while debugging something. That'd be somewhat inconvenient, and it's
	// also inconsistent with most other flags -- we don't warn on
	// -ffunction-sections not being used in -E mode either for example, even
	// though it's not really used either.
	if (!isa<AssembleJobAction>(JA)) {
	// The args claimed here should match the args used in
	// CollectArgsForIntegratedAssembler().
	if (TC.useIntegratedAs()) {
	Args.ClaimAllArgs(options::OPT_mrelax_all);
	Args.ClaimAllArgs(options::OPT_mno_relax_all);
	Args.ClaimAllArgs(options::OPT_mincremental_linker_compatible);
	Args.ClaimAllArgs(options::OPT_mno_incremental_linker_compatible);
	switch (C.getDefaultToolChain().getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	Args.ClaimAllArgs(options::OPT_mimplicit_it_EQ);
	break;
	default:
	break;
	}
	}
	Args.ClaimAllArgs(options::OPT_Wa_COMMA);
	Args.ClaimAllArgs(options::OPT_Xassembler);
	Args.ClaimAllArgs(options::OPT_femit_dwarf_unwind_EQ);
	}

	if (isa<AnalyzeJobAction>(JA)) {
	assert(JA.getType() == types::TY_Plist && "Invalid output type.");
	CmdArgs.push_back("-analyze");
	} else if (isa<MigrateJobAction>(JA)) {
	CmdArgs.push_back("-migrate");
	} else if (isa<PreprocessJobAction>(JA)) {
	if (Output.getType() == types::TY_Dependencies)
	CmdArgs.push_back("-Eonly");
	else {
	CmdArgs.push_back("-E");
	if (Args.hasArg(options::OPT_rewrite_objc) &&
	!Args.hasArg(options::OPT_g_Group))
	CmdArgs.push_back("-P");
	else if (JA.getType() == types::TY_PP_CXXHeaderUnit)
	CmdArgs.push_back("-fdirectives-only");
	}
	} else if (isa<AssembleJobAction>(JA)) {
	CmdArgs.push_back("-emit-obj");

	CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D);

	// Also ignore explicit -force_cpusubtype_ALL option.
	(void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
	} else if (isa<PrecompileJobAction>(JA)) {
	if (JA.getType() == types::TY_Nothing)
	CmdArgs.push_back("-fsyntax-only");
	else if (JA.getType() == types::TY_ModuleFile)
	CmdArgs.push_back("-emit-module-interface");
	else if (JA.getType() == types::TY_HeaderUnit)
	CmdArgs.push_back("-emit-header-unit");
	else
	CmdArgs.push_back("-emit-pch");
	} else if (isa<VerifyPCHJobAction>(JA)) {
	CmdArgs.push_back("-verify-pch");
	} else if (isa<ExtractAPIJobAction>(JA)) {
	assert(JA.getType() == types::TY_API_INFO &&
	"Extract API actions must generate a API information.");
	CmdArgs.push_back("-extract-api");
	if (Arg *ProductNameArg = Args.getLastArg(options::OPT_product_name_EQ))
	ProductNameArg->render(Args, CmdArgs);
	if (Arg *ExtractAPIIgnoresFileArg =
	Args.getLastArg(options::OPT_extract_api_ignores_EQ))
	ExtractAPIIgnoresFileArg->render(Args, CmdArgs);
	} else {
	assert((isa<CompileJobAction>(JA) \|\| isa<BackendJobAction>(JA)) &&
	"Invalid action for clang tool.");
	if (JA.getType() == types::TY_Nothing) {
	CmdArgs.push_back("-fsyntax-only");
	} else if (JA.getType() == types::TY_LLVM_IR \|\|
	JA.getType() == types::TY_LTO_IR) {
	CmdArgs.push_back("-emit-llvm");
	} else if (JA.getType() == types::TY_LLVM_BC \|\|
	JA.getType() == types::TY_LTO_BC) {
	// Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
	if (Triple.isAMDGCN() && IsOpenMPDevice && Args.hasArg(options::OPT_S) &&
	Args.hasArg(options::OPT_emit_llvm)) {
	CmdArgs.push_back("-emit-llvm");
	} else {
	CmdArgs.push_back("-emit-llvm-bc");
	}
	} else if (JA.getType() == types::TY_IFS \|\|
	JA.getType() == types::TY_IFS_CPP) {
	StringRef ArgStr =
	Args.hasArg(options::OPT_interface_stub_version_EQ)
	? Args.getLastArgValue(options::OPT_interface_stub_version_EQ)
	: "ifs-v1";
	CmdArgs.push_back("-emit-interface-stubs");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-interface-stub-version=") + ArgStr.str()));
	} else if (JA.getType() == types::TY_PP_Asm) {
	CmdArgs.push_back("-S");
	} else if (JA.getType() == types::TY_AST) {
	CmdArgs.push_back("-emit-pch");
	} else if (JA.getType() == types::TY_ModuleFile) {
	CmdArgs.push_back("-module-file-info");
	} else if (JA.getType() == types::TY_RewrittenObjC) {
	CmdArgs.push_back("-rewrite-objc");
	rewriteKind = RK_NonFragile;
	} else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
	CmdArgs.push_back("-rewrite-objc");
	rewriteKind = RK_Fragile;
	} else {
	assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
	}

	// Preserve use-list order by default when emitting bitcode, so that
	// loading the bitcode up in 'opt' or 'llc' and running passes gives the
	// same result as running passes here. For LTO, we don't need to preserve
	// the use-list order, since serialization to bitcode is part of the flow.
	if (JA.getType() == types::TY_LLVM_BC)
	CmdArgs.push_back("-emit-llvm-uselists");

	if (IsUsingLTO) {
	if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
	!Args.hasFlag(options::OPT_offload_new_driver,
	options::OPT_no_offload_new_driver, false) &&
	!Triple.isAMDGPU()) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Args.getLastArg(options::OPT_foffload_lto,
	options::OPT_foffload_lto_EQ)
	->getAsString(Args)
	<< Triple.getTriple();
	} else if (Triple.isNVPTX() && !IsRDCMode &&
	JA.isDeviceOffloading(Action::OFK_Cuda)) {
	D.Diag(diag::err_drv_unsupported_opt_for_language_mode)
	<< Args.getLastArg(options::OPT_foffload_lto,
	options::OPT_foffload_lto_EQ)
	->getAsString(Args)
	<< "-fno-gpu-rdc";
	} else {
	assert(LTOMode == LTOK_Full \|\| LTOMode == LTOK_Thin);
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full")));
	CmdArgs.push_back("-flto-unit");
	}
	}
	}

	if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) {
	if (!types::isLLVMIR(Input.getType()))
	D.Diag(diag::err_drv_arg_requires_bitcode_input) << A->getAsString(Args);
	Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
	}

	if (Args.getLastArg(options::OPT_fthin_link_bitcode_EQ))
	Args.AddLastArg(CmdArgs, options::OPT_fthin_link_bitcode_EQ);

	if (Args.getLastArg(options::OPT_save_temps_EQ))
	Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);

	auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
	options::OPT_fmemory_profile_EQ,
	options::OPT_fno_memory_profile);
	if (MemProfArg &&
	!MemProfArg->getOption().matches(options::OPT_fno_memory_profile))
	MemProfArg->render(Args, CmdArgs);

	// Embed-bitcode option.
	// Only white-listed flags below are allowed to be embedded.
	if (C.getDriver().embedBitcodeInObject() && !IsUsingLTO &&
	(isa<BackendJobAction>(JA) \|\| isa<AssembleJobAction>(JA))) {
	// Add flags implied by -fembed-bitcode.
	Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
	// Disable all llvm IR level optimizations.
	CmdArgs.push_back("-disable-llvm-passes");

	// Render target options.
	TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());

	// reject options that shouldn't be supported in bitcode
	// also reject kernel/kext
	static const constexpr unsigned kBitcodeOptionIgnorelist[] = {
	options::OPT_mkernel,
	options::OPT_fapple_kext,
	options::OPT_ffunction_sections,
	options::OPT_fno_function_sections,
	options::OPT_fdata_sections,
	options::OPT_fno_data_sections,
	options::OPT_fbasic_block_sections_EQ,
	options::OPT_funique_internal_linkage_names,
	options::OPT_fno_unique_internal_linkage_names,
	options::OPT_funique_section_names,
	options::OPT_fno_unique_section_names,
	options::OPT_funique_basic_block_section_names,
	options::OPT_fno_unique_basic_block_section_names,
	options::OPT_mrestrict_it,
	options::OPT_mno_restrict_it,
	options::OPT_mstackrealign,
	options::OPT_mno_stackrealign,
	options::OPT_mstack_alignment,
	options::OPT_mcmodel_EQ,
	options::OPT_mlong_calls,
	options::OPT_mno_long_calls,
	options::OPT_ggnu_pubnames,
	options::OPT_gdwarf_aranges,
	options::OPT_fdebug_types_section,
	options::OPT_fno_debug_types_section,
	options::OPT_fdwarf_directory_asm,
	options::OPT_fno_dwarf_directory_asm,
	options::OPT_mrelax_all,
	options::OPT_mno_relax_all,
	options::OPT_ftrap_function_EQ,
	options::OPT_ffixed_r9,
	options::OPT_mfix_cortex_a53_835769,
	options::OPT_mno_fix_cortex_a53_835769,
	options::OPT_ffixed_x18,
	options::OPT_mglobal_merge,
	options::OPT_mno_global_merge,
	options::OPT_mred_zone,
	options::OPT_mno_red_zone,
	options::OPT_Wa_COMMA,
	options::OPT_Xassembler,
	options::OPT_mllvm,
	};
	for (const auto &A : Args)
	if (llvm::is_contained(kBitcodeOptionIgnorelist, A->getOption().getID()))
	D.Diag(diag::err_drv_unsupported_embed_bitcode) << A->getSpelling();

	// Render the CodeGen options that need to be passed.
	Args.addOptOutFlag(CmdArgs, options::OPT_foptimize_sibling_calls,
	options::OPT_fno_optimize_sibling_calls);

	RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
	CmdArgs, JA);

	// Render ABI arguments
	switch (TC.getArch()) {
	default: break;
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumbeb:
	RenderARMABI(D, Triple, Args, CmdArgs);
	break;
	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	RenderAArch64ABI(Triple, Args, CmdArgs);
	break;
	}

	// Optimization level for CodeGen.
	if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O4)) {
	CmdArgs.push_back("-O3");
	D.Diag(diag::warn_O4_is_O3);
	} else {
	A->render(Args, CmdArgs);
	}
	}

	// Input/Output file.
	if (Output.getType() == types::TY_Dependencies) {
	// Handled with other dependency code.
	} else if (Output.isFilename()) {
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	} else {
	assert(Output.isNothing() && "Input output.");
	}

	for (const auto &II : Inputs) {
	addDashXForInput(Args, II, CmdArgs);
	if (II.isFilename())
	CmdArgs.push_back(II.getFilename());
	else
	II.getInputArg().renderAsInput(Args, CmdArgs);
	}

	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::AtFileUTF8(), D.getClangProgramPath(),
	CmdArgs, Inputs, Output));
	return;
	}

	if (C.getDriver().embedBitcodeMarkerOnly() && !IsUsingLTO)
	CmdArgs.push_back("-fembed-bitcode=marker");

	// We normally speed up the clang process a bit by skipping destructors at
	// exit, but when we're generating diagnostics we can rely on some of the
	// cleanup.
	if (!C.isForDiagnostics())
	CmdArgs.push_back("-disable-free");
	CmdArgs.push_back("-clear-ast-before-backend");

	#ifdef NDEBUG
	const bool IsAssertBuild = false;
	#else
	const bool IsAssertBuild = true;
	#endif

	// Disable the verification pass in -asserts builds.
	if (!IsAssertBuild)
	CmdArgs.push_back("-disable-llvm-verifier");

	// Discard value names in assert builds unless otherwise specified.
	if (Args.hasFlag(options::OPT_fdiscard_value_names,
	options::OPT_fno_discard_value_names, !IsAssertBuild)) {
	if (Args.hasArg(options::OPT_fdiscard_value_names) &&
	llvm::any_of(Inputs, [](const clang::driver::InputInfo &II) {
	return types::isLLVMIR(II.getType());
	})) {
	D.Diag(diag::warn_ignoring_fdiscard_for_bitcode);
	}
	CmdArgs.push_back("-discard-value-names");
	}

	// Set the main file name, so that debug info works even with
	// -save-temps.
	CmdArgs.push_back("-main-file-name");
	CmdArgs.push_back(getBaseInputName(Args, Input));

	// Some flags which affect the language (via preprocessor
	// defines).
	if (Args.hasArg(options::OPT_static))
	CmdArgs.push_back("-static-define");

	if (Args.hasArg(options::OPT_municode))
	CmdArgs.push_back("-DUNICODE");

	if (isa<AnalyzeJobAction>(JA))
	RenderAnalyzerOptions(Args, CmdArgs, Triple, Input);

	if (isa<AnalyzeJobAction>(JA) \|\|
	(isa<PreprocessJobAction>(JA) && Args.hasArg(options::OPT__analyze)))
	CmdArgs.push_back("-setup-static-analyzer");

	// Enable compatilibily mode to avoid analyzer-config related errors.
	// Since we can't access frontend flags through hasArg, let's manually iterate
	// through them.
	bool FoundAnalyzerConfig = false;
	for (auto *Arg : Args.filtered(options::OPT_Xclang))
	if (StringRef(Arg->getValue()) == "-analyzer-config") {
	FoundAnalyzerConfig = true;
	break;
	}
	if (!FoundAnalyzerConfig)
	for (auto *Arg : Args.filtered(options::OPT_Xanalyzer))
	if (StringRef(Arg->getValue()) == "-analyzer-config") {
	FoundAnalyzerConfig = true;
	break;
	}
	if (FoundAnalyzerConfig)
	CmdArgs.push_back("-analyzer-config-compatibility-mode=true");

	CheckCodeGenerationOptions(D, Args);

	unsigned FunctionAlignment = ParseFunctionAlignment(TC, Args);
	assert(FunctionAlignment <= 31 && "function alignment will be truncated!");
	if (FunctionAlignment) {
	CmdArgs.push_back("-function-alignment");
	CmdArgs.push_back(Args.MakeArgString(std::to_string(FunctionAlignment)));
	}

	// We support -falign-loops=N where N is a power of 2. GCC supports more
	// forms.
	if (const Arg *A = Args.getLastArg(options::OPT_falign_loops_EQ)) {
	unsigned Value = 0;
	if (StringRef(A->getValue()).getAsInteger(10, Value) \|\| Value > 65536)
	TC.getDriver().Diag(diag::err_drv_invalid_int_value)
	<< A->getAsString(Args) << A->getValue();
	else if (Value & (Value - 1))
	TC.getDriver().Diag(diag::err_drv_alignment_not_power_of_two)
	<< A->getAsString(Args) << A->getValue();
	// Treat =0 as unspecified (use the target preference).
	if (Value)
	CmdArgs.push_back(Args.MakeArgString("-falign-loops=" +
	Twine(std::min(Value, 65536u))));
	}

	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(TC, Args);
	Arg *LastPICDataRelArg =
	Args.getLastArg(options::OPT_mno_pic_data_is_text_relative,
	options::OPT_mpic_data_is_text_relative);
	bool NoPICDataIsTextRelative = false;
	if (LastPICDataRelArg) {
	if (LastPICDataRelArg->getOption().matches(
	options::OPT_mno_pic_data_is_text_relative)) {
	NoPICDataIsTextRelative = true;
	if (!PICLevel)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< "-mno-pic-data-is-text-relative"
	<< "-fpic/-fpie";
	}
	if (!Triple.isSystemZ())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< (NoPICDataIsTextRelative ? "-mno-pic-data-is-text-relative"
	: "-mpic-data-is-text-relative")
	<< RawTriple.str();
	}

	bool IsROPI = RelocationModel == llvm::Reloc::ROPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI;
	bool IsRWPI = RelocationModel == llvm::Reloc::RWPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI;

	if (Args.hasArg(options::OPT_mcmse) &&
	!Args.hasArg(options::OPT_fallow_unsupported)) {
	if (IsROPI)
	D.Diag(diag::err_cmse_pi_are_incompatible) << IsROPI;
	if (IsRWPI)
	D.Diag(diag::err_cmse_pi_are_incompatible) << !IsRWPI;
	}

	if (IsROPI && types::isCXX(Input.getType()) &&
	!Args.hasArg(options::OPT_fallow_unsupported))
	D.Diag(diag::err_drv_ropi_incompatible_with_cxx);

	const char *RMName = RelocationModelName(RelocationModel);
	if (RMName) {
	CmdArgs.push_back("-mrelocation-model");
	CmdArgs.push_back(RMName);
	}
	if (PICLevel > 0) {
	CmdArgs.push_back("-pic-level");
	CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
	if (IsPIE)
	CmdArgs.push_back("-pic-is-pie");
	if (NoPICDataIsTextRelative)
	CmdArgs.push_back("-mcmodel=medium");
	}

	if (RelocationModel == llvm::Reloc::ROPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI)
	CmdArgs.push_back("-fropi");
	if (RelocationModel == llvm::Reloc::RWPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI)
	CmdArgs.push_back("-frwpi");

	if (Arg *A = Args.getLastArg(options::OPT_meabi)) {
	CmdArgs.push_back("-meabi");
	CmdArgs.push_back(A->getValue());
	}

	// -fsemantic-interposition is forwarded to CC1: set the
	// "SemanticInterposition" metadata to 1 (make some linkages interposable) and
	// make default visibility external linkage definitions dso_preemptable.
	//
	// -fno-semantic-interposition: if the target supports .Lfoo$local local
	// aliases (make default visibility external linkage definitions dso_local).
	// This is the CC1 default for ELF to match COFF/Mach-O.
	//
	// Otherwise use Clang's traditional behavior: like
	// -fno-semantic-interposition but local aliases are not used. So references
	// can be interposed if not optimized out.
	if (Triple.isOSBinFormatELF()) {
	Arg *A = Args.getLastArg(options::OPT_fsemantic_interposition,
	options::OPT_fno_semantic_interposition);
	if (RelocationModel != llvm::Reloc::Static && !IsPIE) {
	// The supported targets need to call AsmPrinter::getSymbolPreferLocal.
	bool SupportsLocalAlias =
	Triple.isAArch64() \|\| Triple.isRISCV() \|\| Triple.isX86();
	if (!A)
	CmdArgs.push_back("-fhalf-no-semantic-interposition");
	else if (A->getOption().matches(options::OPT_fsemantic_interposition))
	A->render(Args, CmdArgs);
	else if (!SupportsLocalAlias)
	CmdArgs.push_back("-fhalf-no-semantic-interposition");
	}
	}

	{
	std::string Model;
	if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) {
	if (!TC.isThreadModelSupported(A->getValue()))
	D.Diag(diag::err_drv_invalid_thread_model_for_target)
	<< A->getValue() << A->getAsString(Args);
	Model = A->getValue();
	} else
	Model = TC.getThreadModel();
	if (Model != "posix") {
	CmdArgs.push_back("-mthread-model");
	CmdArgs.push_back(Args.MakeArgString(Model));
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
	StringRef Name = A->getValue();
	if (Name == "SVML") {
	if (Triple.getArch() != llvm::Triple::x86 &&
	Triple.getArch() != llvm::Triple::x86_64)
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Name << Triple.getArchName();
	} else if (Name == "LIBMVEC-X86") {
	if (Triple.getArch() != llvm::Triple::x86 &&
	Triple.getArch() != llvm::Triple::x86_64)
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Name << Triple.getArchName();
	} else if (Name == "SLEEF") {
	if (Triple.getArch() != llvm::Triple::aarch64 &&
	Triple.getArch() != llvm::Triple::aarch64_be)
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Name << Triple.getArchName();
	}
	A->render(Args, CmdArgs);
	}

	if (Args.hasFlag(options::OPT_fmerge_all_constants,
	options::OPT_fno_merge_all_constants, false))
	CmdArgs.push_back("-fmerge-all-constants");

	Args.addOptOutFlag(CmdArgs, options::OPT_fdelete_null_pointer_checks,
	options::OPT_fno_delete_null_pointer_checks);

	// LLVM Code Generator Options.

	for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file_EQ)) {
	StringRef Map = A->getValue();
	if (!llvm::sys::fs::exists(Map)) {
	D.Diag(diag::err_drv_no_such_file) << Map;
	} else {
	A->render(Args, CmdArgs);
	A->claim();
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_vec_extabi,
	options::OPT_mabi_EQ_vec_default)) {
	if (!Triple.isOSAIX())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi)
	CmdArgs.push_back("-mabi=vec-extabi");
	else
	CmdArgs.push_back("-mabi=vec-default");
	}

	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_quadword_atomics)) {
	if (!Triple.isOSAIX() \|\| Triple.isPPC32())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	CmdArgs.push_back("-mabi=quadword-atomics");
	}

	if (Arg *A = Args.getLastArg(options::OPT_mlong_double_128)) {
	// Emit the unsupported option error until the Clang's library integration
	// support for 128-bit long double is available for AIX.
	if (Triple.isOSAIX())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	}

	if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) {
	StringRef v = A->getValue();
	// FIXME: Validate the argument here so we don't produce meaningless errors
	// about -fwarn-stack-size=.
	if (v.empty())
	D.Diag(diag::err_drv_missing_argument) << A->getSpelling() << 1;
	else
	CmdArgs.push_back(Args.MakeArgString("-fwarn-stack-size=" + v));
	A->claim();
	}

	Args.addOptOutFlag(CmdArgs, options::OPT_fjump_tables,
	options::OPT_fno_jump_tables);
	Args.addOptInFlag(CmdArgs, options::OPT_fprofile_sample_accurate,
	options::OPT_fno_profile_sample_accurate);
	Args.addOptOutFlag(CmdArgs, options::OPT_fpreserve_as_comments,
	options::OPT_fno_preserve_as_comments);

	if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
	CmdArgs.push_back("-mregparm");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_maix_struct_return,
	options::OPT_msvr4_struct_return)) {
	if (!TC.getTriple().isPPC32()) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	} else if (A->getOption().matches(options::OPT_maix_struct_return)) {
	CmdArgs.push_back("-maix-struct-return");
	} else {
	assert(A->getOption().matches(options::OPT_msvr4_struct_return));
	CmdArgs.push_back("-msvr4-struct-return");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return,
	options::OPT_freg_struct_return)) {
	if (TC.getArch() != llvm::Triple::x86) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	} else if (A->getOption().matches(options::OPT_fpcc_struct_return)) {
	CmdArgs.push_back("-fpcc-struct-return");
	} else {
	assert(A->getOption().matches(options::OPT_freg_struct_return));
	CmdArgs.push_back("-freg-struct-return");
	}
	}

	if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
	CmdArgs.push_back("-fdefault-calling-conv=stdcall");

	if (Args.hasArg(options::OPT_fenable_matrix)) {
	// enable-matrix is needed by both the LangOpts and by LLVM.
	CmdArgs.push_back("-fenable-matrix");
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-enable-matrix");
	}

	CodeGenOptions::FramePointerKind FPKeepKind =
	getFramePointerKind(Args, RawTriple);
	const char *FPKeepKindStr = nullptr;
	switch (FPKeepKind) {
	case CodeGenOptions::FramePointerKind::None:
	FPKeepKindStr = "-mframe-pointer=none";
	break;
	case CodeGenOptions::FramePointerKind::NonLeaf:
	FPKeepKindStr = "-mframe-pointer=non-leaf";
	break;
	case CodeGenOptions::FramePointerKind::All:
	FPKeepKindStr = "-mframe-pointer=all";
	break;
	}
	assert(FPKeepKindStr && "unknown FramePointerKind");
	CmdArgs.push_back(FPKeepKindStr);

	Args.addOptOutFlag(CmdArgs, options::OPT_fzero_initialized_in_bss,
	options::OPT_fno_zero_initialized_in_bss);

	bool OFastEnabled = isOptimizationLevelFast(Args);
	// If -Ofast is the optimization level, then -fstrict-aliasing should be
	// enabled. This alias option is being used to simplify the hasFlag logic.
	OptSpecifier StrictAliasingAliasOption =
	OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing;
	// We turn strict aliasing off by default if we're in CL mode, since MSVC
	// doesn't do any TBAA.
	bool TBAAOnByDefault = !D.IsCLMode();
	if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption,
	options::OPT_fno_strict_aliasing, TBAAOnByDefault))
	CmdArgs.push_back("-relaxed-aliasing");
	if (!Args.hasFlag(options::OPT_fstruct_path_tbaa,
	options::OPT_fno_struct_path_tbaa, true))
	CmdArgs.push_back("-no-struct-path-tbaa");
	Args.addOptInFlag(CmdArgs, options::OPT_fstrict_enums,
	options::OPT_fno_strict_enums);
	Args.addOptOutFlag(CmdArgs, options::OPT_fstrict_return,
	options::OPT_fno_strict_return);
	Args.addOptInFlag(CmdArgs, options::OPT_fallow_editor_placeholders,
	options::OPT_fno_allow_editor_placeholders);
	Args.addOptInFlag(CmdArgs, options::OPT_fstrict_vtable_pointers,
	options::OPT_fno_strict_vtable_pointers);
	Args.addOptInFlag(CmdArgs, options::OPT_fforce_emit_vtables,
	options::OPT_fno_force_emit_vtables);
	Args.addOptOutFlag(CmdArgs, options::OPT_foptimize_sibling_calls,
	options::OPT_fno_optimize_sibling_calls);
	Args.addOptOutFlag(CmdArgs, options::OPT_fescaping_block_tail_calls,
	options::OPT_fno_escaping_block_tail_calls);

	Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses,
	options::OPT_fno_fine_grained_bitfield_accesses);

	Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
	options::OPT_fno_experimental_relative_cxx_abi_vtables);

	// Handle segmented stacks.
	Args.addOptInFlag(CmdArgs, options::OPT_fsplit_stack,
	options::OPT_fno_split_stack);

	// -fprotect-parens=0 is default.
	if (Args.hasFlag(options::OPT_fprotect_parens,
	options::OPT_fno_protect_parens, false))
	CmdArgs.push_back("-fprotect-parens");

	RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);

	if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) {
	const llvm::Triple::ArchType Arch = TC.getArch();
	if (Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64) {
	StringRef V = A->getValue();
	if (V == "64")
	CmdArgs.push_back("-fextend-arguments=64");
	else if (V != "32")
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< A->getValue() << A->getOption().getName();
	} else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getOption().getName() << TripleStr;
	}

	if (Arg *A = Args.getLastArg(options::OPT_mdouble_EQ)) {
	if (TC.getArch() == llvm::Triple::avr)
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	if (Arg *A = Args.getLastArg(options::OPT_LongDouble_Group)) {
	if (TC.getTriple().isX86())
	A->render(Args, CmdArgs);
	else if (TC.getTriple().isPPC() &&
	(A->getOption().getID() != options::OPT_mlong_double_80))
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	// Decide whether to use verbose asm. Verbose assembly is the default on
	// toolchains which have the integrated assembler on by default.
	bool IsIntegratedAssemblerDefault = TC.IsIntegratedAssemblerDefault();
	if (!Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm,
	IsIntegratedAssemblerDefault))
	CmdArgs.push_back("-fno-verbose-asm");

	// Parse 'none' or '$major.$minor'. Disallow -fbinutils-version=0 because we
	// use that to indicate the MC default in the backend.
	if (Arg *A = Args.getLastArg(options::OPT_fbinutils_version_EQ)) {
	StringRef V = A->getValue();
	unsigned Num;
	if (V == "none")
	A->render(Args, CmdArgs);
	else if (!V.consumeInteger(10, Num) && Num > 0 &&
	(V.empty() \|\| (V.consume_front(".") &&
	!V.consumeInteger(10, Num) && V.empty())))
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< A->getValue() << A->getOption().getName();
	}

	// If toolchain choose to use MCAsmParser for inline asm don't pass the
	// option to disable integrated-as explictly.
	if (!TC.useIntegratedAs() && !TC.parseInlineAsmUsingAsmParser())
	CmdArgs.push_back("-no-integrated-as");

	if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
	CmdArgs.push_back("-mdebug-pass");
	CmdArgs.push_back("Structure");
	}
	if (Args.hasArg(options::OPT_fdebug_pass_arguments)) {
	CmdArgs.push_back("-mdebug-pass");
	CmdArgs.push_back("Arguments");
	}

	// Enable -mconstructor-aliases except on darwin, where we have to work around
	// a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
	// aliases aren't supported.
	if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
	CmdArgs.push_back("-mconstructor-aliases");

	// Darwin's kernel doesn't support guard variables; just die if we
	// try to use them.
	if (KernelOrKext && RawTriple.isOSDarwin())
	CmdArgs.push_back("-fforbid-guard-variables");

	if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields,
	Triple.isWindowsGNUEnvironment())) {
	CmdArgs.push_back("-mms-bitfields");
	}

	// Non-PIC code defaults to -fdirect-access-external-data while PIC code
	// defaults to -fno-direct-access-external-data. Pass the option if different
	// from the default.
	if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data,
	options::OPT_fno_direct_access_external_data))
	if (A->getOption().matches(options::OPT_fdirect_access_external_data) !=
	(PICLevel == 0))
	A->render(Args, CmdArgs);

	if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) {
	CmdArgs.push_back("-fno-plt");
	}

	// -fhosted is default.
	// TODO: Audit uses of KernelOrKext and see where it'd be more appropriate to
	// use Freestanding.
	bool Freestanding =
	Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) \|\|
	KernelOrKext;
	if (Freestanding)
	CmdArgs.push_back("-ffreestanding");

	Args.AddLastArg(CmdArgs, options::OPT_fno_knr_functions);

	// This is a coarse approximation of what llvm-gcc actually does, both
	// -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more
	// complicated ways.
	auto SanitizeArgs = TC.getSanitizerArgs(Args);

	bool IsAsyncUnwindTablesDefault =
	TC.getDefaultUnwindTableLevel(Args) == ToolChain::UnwindTableLevel::Asynchronous;
	bool IsSyncUnwindTablesDefault =
	TC.getDefaultUnwindTableLevel(Args) == ToolChain::UnwindTableLevel::Synchronous;

	bool AsyncUnwindTables = Args.hasFlag(
	options::OPT_fasynchronous_unwind_tables,
	options::OPT_fno_asynchronous_unwind_tables,
	(IsAsyncUnwindTablesDefault \|\| SanitizeArgs.needsUnwindTables()) &&
	!Freestanding);
	bool UnwindTables =
	Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables,
	IsSyncUnwindTablesDefault && !Freestanding);
	if (AsyncUnwindTables)
	CmdArgs.push_back("-funwind-tables=2");
	else if (UnwindTables)
	CmdArgs.push_back("-funwind-tables=1");

	// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
	// `--gpu-use-aux-triple-only` is specified.
	if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
	(IsCudaDevice \|\| IsHIPDevice)) {
	const ArgList &HostArgs =
	C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
	std::string HostCPU =
	getCPUName(D, HostArgs, TC.getAuxTriple(), /FromAs*/ false);
	if (!HostCPU.empty()) {
	CmdArgs.push_back("-aux-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(HostCPU));
	}
	getTargetFeatures(D, *TC.getAuxTriple(), HostArgs, CmdArgs,
	/ForAS/ false, /IsAux/ true);
	}

	TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());

	if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
	StringRef CM = A->getValue();
	if (CM == "small" \|\| CM == "kernel" \|\| CM == "medium" \|\| CM == "large" \|\|
	CM == "tiny") {
	if (Triple.isOSAIX() && CM == "medium")
	CmdArgs.push_back("-mcmodel=large");
	else if (Triple.isAArch64() && (CM == "kernel" \|\| CM == "medium"))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< CM << A->getOption().getName();
	else
	A->render(Args, CmdArgs);
	} else {
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< CM << A->getOption().getName();
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_mtls_size_EQ)) {
	StringRef Value = A->getValue();
	unsigned TLSSize = 0;
	Value.getAsInteger(10, TLSSize);
	if (!Triple.isAArch64() \|\| !Triple.isOSBinFormatELF())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getOption().getName() << TripleStr;
	if (TLSSize != 12 && TLSSize != 24 && TLSSize != 32 && TLSSize != 48)
	D.Diag(diag::err_drv_invalid_int_value)
	<< A->getOption().getName() << Value;
	Args.AddLastArg(CmdArgs, options::OPT_mtls_size_EQ);
	}

	// Add the target cpu
	std::string CPU = getCPUName(D, Args, Triple, /FromAs/ false);
	if (!CPU.empty()) {
	CmdArgs.push_back("-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(CPU));
	}

	RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);

	// FIXME: For now we want to demote any errors to warnings, when they have
	// been raised for asking the wrong question of scalable vectors, such as
	// asking for the fixed number of elements. This may happen because code that
	// is not yet ported to work for scalable vectors uses the wrong interfaces,
	// whereas the behaviour is actually correct. Emitting a warning helps bring
	// up scalable vector support in an incremental way. When scalable vector
	// support is stable enough, all uses of wrong interfaces should be considered
	// as errors, but until then, we can live with a warning being emitted by the
	// compiler. This way, Clang can be used to compile code with scalable vectors
	// and identify possible issues.
	if (isa<AssembleJobAction>(JA) \|\| isa<CompileJobAction>(JA) \|\|
	isa<BackendJobAction>(JA)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
	}

	// These two are potentially updated by AddClangCLArgs.
	codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
	bool EmitCodeView = false;

	// Add clang-cl arguments.
	types::ID InputType = Input.getType();
	if (D.IsCLMode())
	AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView);

	DwarfFissionKind DwarfFission = DwarfFissionKind::None;
	renderDebugOptions(TC, D, RawTriple, Args, EmitCodeView,
	types::isLLVMIR(InputType), CmdArgs, DebugInfoKind,
	DwarfFission);

	// This controls whether or not we perform JustMyCode instrumentation.
	if (Args.hasFlag(options::OPT_fjmc, options::OPT_fno_jmc, false)) {
	if (TC.getTriple().isOSBinFormatELF()) {
	if (DebugInfoKind >= codegenoptions::DebugInfoConstructor)
	CmdArgs.push_back("-fjmc");
	else
	D.Diag(clang::diag::warn_drv_jmc_requires_debuginfo) << "-fjmc"
	<< "-g";
	} else {
	D.Diag(clang::diag::warn_drv_fjmc_for_elf_only);
	}
	}

	// Add the split debug info name to the command lines here so we
	// can propagate it to the backend.
	bool SplitDWARF = (DwarfFission != DwarfFissionKind::None) &&
	(TC.getTriple().isOSBinFormatELF() \|\|
	TC.getTriple().isOSBinFormatWasm()) &&
	(isa<AssembleJobAction>(JA) \|\| isa<CompileJobAction>(JA) \|\|
	isa<BackendJobAction>(JA));
	if (SplitDWARF) {
	const char *SplitDWARFOut = SplitDebugName(JA, Args, Input, Output);
	CmdArgs.push_back("-split-dwarf-file");
	CmdArgs.push_back(SplitDWARFOut);
	if (DwarfFission == DwarfFissionKind::Split) {
	CmdArgs.push_back("-split-dwarf-output");
	CmdArgs.push_back(SplitDWARFOut);
	}
	}

	// Pass the linker version in use.
	if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
	CmdArgs.push_back("-target-linker-version");
	CmdArgs.push_back(A->getValue());
	}

	// Explicitly error on some things we know we don't support and can't just
	// ignore.
	if (!Args.hasArg(options::OPT_fallow_unsupported)) {
	Arg *Unsupported;
	if (types::isCXX(InputType) && RawTriple.isOSDarwin() &&
	TC.getArch() == llvm::Triple::x86) {
	if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) \|\|
	(Unsupported = Args.getLastArg(options::OPT_mkernel)))
	D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386)
	<< Unsupported->getOption().getName();
	}
	// The faltivec option has been superseded by the maltivec option.
	if ((Unsupported = Args.getLastArg(options::OPT_faltivec)))
	D.Diag(diag::err_drv_clang_unsupported_opt_faltivec)
	<< Unsupported->getOption().getName()
	<< "please use -maltivec and include altivec.h explicitly";
	if ((Unsupported = Args.getLastArg(options::OPT_fno_altivec)))
	D.Diag(diag::err_drv_clang_unsupported_opt_faltivec)
	<< Unsupported->getOption().getName() << "please use -mno-altivec";
	}

	Args.AddAllArgs(CmdArgs, options::OPT_v);

	if (Args.getLastArg(options::OPT_H)) {
	CmdArgs.push_back("-H");
	CmdArgs.push_back("-sys-header-deps");
	}
	Args.AddAllArgs(CmdArgs, options::OPT_fshow_skipped_includes);

	if (D.CCPrintHeadersFormat && !D.CCGenDiagnostics) {
	CmdArgs.push_back("-header-include-file");
	CmdArgs.push_back(!D.CCPrintHeadersFilename.empty()
	? D.CCPrintHeadersFilename.c_str()
	: "-");
	CmdArgs.push_back("-sys-header-deps");
	CmdArgs.push_back(Args.MakeArgString(
	"-header-include-format=" +
	std::string(headerIncludeFormatKindToString(D.CCPrintHeadersFormat))));
	CmdArgs.push_back(
	Args.MakeArgString("-header-include-filtering=" +
	std::string(headerIncludeFilteringKindToString(
	D.CCPrintHeadersFiltering))));
	}
	Args.AddLastArg(CmdArgs, options::OPT_P);
	Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout);

	if (D.CCLogDiagnostics && !D.CCGenDiagnostics) {
	CmdArgs.push_back("-diagnostic-log-file");
	CmdArgs.push_back(!D.CCLogDiagnosticsFilename.empty()
	? D.CCLogDiagnosticsFilename.c_str()
	: "-");
	}

	// Give the gen diagnostics more chances to succeed, by avoiding intentional
	// crashes.
	if (D.CCGenDiagnostics)
	CmdArgs.push_back("-disable-pragma-debug-crash");

	// Allow backend to put its diagnostic files in the same place as frontend
	// crash diagnostics files.
	if (Args.hasArg(options::OPT_fcrash_diagnostics_dir)) {
	StringRef Dir = Args.getLastArgValue(options::OPT_fcrash_diagnostics_dir);
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-crash-diagnostics-dir=" + Dir));
	}

	bool UseSeparateSections = isUseSeparateSections(Triple);

	if (Args.hasFlag(options::OPT_ffunction_sections,
	options::OPT_fno_function_sections, UseSeparateSections)) {
	CmdArgs.push_back("-ffunction-sections");
	}

	if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) {
	StringRef Val = A->getValue();
	if (Triple.isX86() && Triple.isOSBinFormatELF()) {
	if (Val != "all" && Val != "labels" && Val != "none" &&
	!Val.startswith("list="))
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	else
	A->render(Args, CmdArgs);
	} else if (Triple.isNVPTX()) {
	// Do not pass the option to the GPU compilation. We still want it enabled
	// for the host-side compilation, so seeing it here is not an error.
	} else if (Val != "none") {
	// =none is allowed everywhere. It's useful for overriding the option
	// and is the same as not specifying the option.
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}

	bool HasDefaultDataSections = Triple.isOSBinFormatXCOFF();
	if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
	UseSeparateSections \|\| HasDefaultDataSections)) {
	CmdArgs.push_back("-fdata-sections");
	}

	Args.addOptOutFlag(CmdArgs, options::OPT_funique_section_names,
	options::OPT_fno_unique_section_names);
	Args.addOptInFlag(CmdArgs, options::OPT_funique_internal_linkage_names,
	options::OPT_fno_unique_internal_linkage_names);
	Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
	options::OPT_fno_unique_basic_block_section_names);

	if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
	options::OPT_fno_split_machine_functions)) {
	// This codegen pass is only available on x86-elf targets.
	if (Triple.isX86() && Triple.isOSBinFormatELF()) {
	if (A->getOption().matches(options::OPT_fsplit_machine_functions))
	A->render(Args, CmdArgs);
	} else {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}

	Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
	options::OPT_finstrument_functions_after_inlining,
	options::OPT_finstrument_function_entry_bare);

	// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
	// for sampling, overhead of call arc collection is way too high and there's
	// no way to collect the output.
	if (!Triple.isNVPTX() && !Triple.isAMDGCN())
	addPGOAndCoverageFlags(TC, C, D, Output, Args, SanitizeArgs, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);

	if (getLastProfileSampleUseArg(Args) &&
	Args.hasArg(options::OPT_fsample_profile_use_profi)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-sample-profile-use-profi");
	}

	// Add runtime flag for PS4/PS5 when PGO, coverage, or sanitizers are enabled.
	if (RawTriple.isPS() &&
	!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
	PScpu::addProfileRTArgs(TC, Args, CmdArgs);
	PScpu::addSanitizerArgs(TC, Args, CmdArgs);
	}

	// Pass options for controlling the default header search paths.
	if (Args.hasArg(options::OPT_nostdinc)) {
	CmdArgs.push_back("-nostdsysteminc");
	CmdArgs.push_back("-nobuiltininc");
	} else {
	if (Args.hasArg(options::OPT_nostdlibinc))
	CmdArgs.push_back("-nostdsysteminc");
	Args.AddLastArg(CmdArgs, options::OPT_nostdincxx);
	Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc);
	}

	// Pass the path to compiler resource files.
	CmdArgs.push_back("-resource-dir");
	CmdArgs.push_back(D.ResourceDir.c_str());

	Args.AddLastArg(CmdArgs, options::OPT_working_directory);

	RenderARCMigrateToolOptions(D, Args, CmdArgs);

	// Add preprocessing options like -I, -D, etc. if we are using the
	// preprocessor.
	//
	// FIXME: Support -fpreprocessed
	if (types::getPreprocessedType(InputType) != types::TY_INVALID)
	AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);

	// Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
	// that "The compiler can only warn and ignore the option if not recognized".
	// When building with ccache, it will pass -D options to clang even on
	// preprocessed inputs and configure concludes that -fPIC is not supported.
	Args.ClaimAllArgs(options::OPT_D);

	// Manually translate -O4 to -O3; let clang reject others.
	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O4)) {
	CmdArgs.push_back("-O3");
	D.Diag(diag::warn_O4_is_O3);
	} else {
	A->render(Args, CmdArgs);
	}
	}

	// Warn about ignored options to clang.
	for (const Arg *A :
	Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) {
	D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args);
	A->claim();
	}

	for (const Arg *A :
	Args.filtered(options::OPT_clang_ignored_legacy_options_Group)) {
	D.Diag(diag::warn_ignored_clang_option) << A->getAsString(Args);
	A->claim();
	}

	claimNoWarnArgs(Args);

	Args.AddAllArgs(CmdArgs, options::OPT_R_Group);

	for (const Arg *A :
	Args.filtered(options::OPT_W_Group, options::OPT__SLASH_wd)) {
	A->claim();
	if (A->getOption().getID() == options::OPT__SLASH_wd) {
	unsigned WarningNumber;
	if (StringRef(A->getValue()).getAsInteger(10, WarningNumber)) {
	D.Diag(diag::err_drv_invalid_int_value)
	<< A->getAsString(Args) << A->getValue();
	continue;
	}

	if (auto Group = diagGroupFromCLWarningID(WarningNumber)) {
	CmdArgs.push_back(Args.MakeArgString(
	"-Wno-" + DiagnosticIDs::getWarningOptionForGroup(*Group)));
	}
	continue;
	}
	A->render(Args, CmdArgs);
	}

	if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false))
	CmdArgs.push_back("-pedantic");
	Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors);
	Args.AddLastArg(CmdArgs, options::OPT_w);

	Args.addOptInFlag(CmdArgs, options::OPT_ffixed_point,
	options::OPT_fno_fixed_point);

	if (Arg *A = Args.getLastArg(options::OPT_fcxx_abi_EQ))
	A->render(Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
	options::OPT_fno_experimental_relative_cxx_abi_vtables);

	if (Arg *A = Args.getLastArg(options::OPT_ffuchsia_api_level_EQ))
	A->render(Args, CmdArgs);

	// Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi}
	// (-ansi is equivalent to -std=c89 or -std=c++98).
	//
	// If a std is supplied, only add -trigraphs if it follows the
	// option.
	bool ImplyVCPPCVer = false;
	bool ImplyVCPPCXXVer = false;
	const Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi);
	if (Std) {
	if (Std->getOption().matches(options::OPT_ansi))
	if (types::isCXX(InputType))
	CmdArgs.push_back("-std=c++98");
	else
	CmdArgs.push_back("-std=c89");
	else
	Std->render(Args, CmdArgs);

	// If -f(no-)trigraphs appears after the language standard flag, honor it.
	if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi,
	options::OPT_ftrigraphs,
	options::OPT_fno_trigraphs))
	if (A != Std)
	A->render(Args, CmdArgs);
	} else {
	// Honor -std-default.
	//
	// FIXME: Clang doesn't correctly handle -std= when the input language
	// doesn't match. For the time being just ignore this for C++ inputs;
	// eventually we want to do all the standard defaulting here instead of
	// splitting it between the driver and clang -cc1.
	if (!types::isCXX(InputType)) {
	if (!Args.hasArg(options::OPT__SLASH_std)) {
	Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=",
	/Joined=/true);
	} else
	ImplyVCPPCVer = true;
	}
	else if (IsWindowsMSVC)
	ImplyVCPPCXXVer = true;

	Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs,
	options::OPT_fno_trigraphs);
	}

	// GCC's behavior for -Wwrite-strings is a bit strange:
	// * In C, this "warning flag" changes the types of string literals from
	// 'char[N]' to 'const char[N]', and thus triggers an unrelated warning
	// for the discarded qualifier.
	// * In C++, this is just a normal warning flag.
	//
	// Implementing this warning correctly in C is hard, so we follow GCC's
	// behavior for now. FIXME: Directly diagnose uses of a string literal as
	// a non-const char* in C, rather than using this crude hack.
	if (!types::isCXX(InputType)) {
	// FIXME: This should behave just like a warning flag, and thus should also
	// respect -Weverything, -Wno-everything, -Werror=write-strings, and so on.
	Arg *WriteStrings =
	Args.getLastArg(options::OPT_Wwrite_strings,
	options::OPT_Wno_write_strings, options::OPT_w);
	if (WriteStrings &&
	WriteStrings->getOption().matches(options::OPT_Wwrite_strings))
	CmdArgs.push_back("-fconst-strings");
	}

	// GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active
	// during C++ compilation, which it is by default. GCC keeps this define even
	// in the presence of '-w', match this behavior bug-for-bug.
	if (types::isCXX(InputType) &&
	Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated,
	true)) {
	CmdArgs.push_back("-fdeprecated-macro");
	}

	// Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'.
	if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) {
	if (Asm->getOption().matches(options::OPT_fasm))
	CmdArgs.push_back("-fgnu-keywords");
	else
	CmdArgs.push_back("-fno-gnu-keywords");
	}

	if (!ShouldEnableAutolink(Args, TC, JA))
	CmdArgs.push_back("-fno-autolink");

	// Add in -fdebug-compilation-dir if necessary.
	const char *DebugCompilationDir =
	addDebugCompDirArg(Args, CmdArgs, D.getVFS());

	addDebugPrefixMapArg(D, TC, Args, CmdArgs);

	if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_,
	options::OPT_ftemplate_depth_EQ)) {
	CmdArgs.push_back("-ftemplate-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) {
	CmdArgs.push_back("-foperator-arrow-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) {
	CmdArgs.push_back("-fconstexpr-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) {
	CmdArgs.push_back("-fconstexpr-steps");
	CmdArgs.push_back(A->getValue());
	}

	Args.AddLastArg(CmdArgs, options::OPT_fexperimental_library);

	if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter))
	CmdArgs.push_back("-fexperimental-new-constant-interpreter");

	if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) {
	CmdArgs.push_back("-fbracket-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ,
	options::OPT_Wlarge_by_value_copy_def)) {
	if (A->getNumValues()) {
	StringRef bytes = A->getValue();
	CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes));
	} else
	CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value
	}

	if (Args.hasArg(options::OPT_relocatable_pch))
	CmdArgs.push_back("-relocatable-pch");

	if (const Arg *A = Args.getLastArg(options::OPT_fcf_runtime_abi_EQ)) {
	static const char *kCFABIs[] = {
	"standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1",
	};

	if (!llvm::is_contained(kCFABIs, StringRef(A->getValue())))
	D.Diag(diag::err_drv_invalid_cf_runtime_abi) << A->getValue();
	else
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) {
	CmdArgs.push_back("-fconstant-string-class");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) {
	CmdArgs.push_back("-ftabstop");
	CmdArgs.push_back(A->getValue());
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fstack_size_section,
	options::OPT_fno_stack_size_section);

	if (Args.hasArg(options::OPT_fstack_usage)) {
	CmdArgs.push_back("-stack-usage-file");

	if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
	SmallString<128> OutputFilename(OutputOpt->getValue());
	llvm::sys::path::replace_extension(OutputFilename, "su");
	CmdArgs.push_back(Args.MakeArgString(OutputFilename));
	} else
	CmdArgs.push_back(
	Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".su"));
	}

	CmdArgs.push_back("-ferror-limit");
	if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ))
	CmdArgs.push_back(A->getValue());
	else
	CmdArgs.push_back("19");

	if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) {
	CmdArgs.push_back("-fmacro-backtrace-limit");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) {
	CmdArgs.push_back("-ftemplate-backtrace-limit");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) {
	CmdArgs.push_back("-fconstexpr-backtrace-limit");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) {
	CmdArgs.push_back("-fspell-checking-limit");
	CmdArgs.push_back(A->getValue());
	}

	// Pass -fmessage-length=.
	unsigned MessageLength = 0;
	if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) {
	StringRef V(A->getValue());
	if (V.getAsInteger(0, MessageLength))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< V << A->getOption().getName();
	} else {
	// If -fmessage-length=N was not specified, determine whether this is a
	// terminal and, if so, implicitly define -fmessage-length appropriately.
	MessageLength = llvm::sys::Process::StandardErrColumns();
	}
	if (MessageLength != 0)
	CmdArgs.push_back(
	Args.MakeArgString("-fmessage-length=" + Twine(MessageLength)));

	if (Arg *A = Args.getLastArg(options::OPT_frandomize_layout_seed_EQ))
	CmdArgs.push_back(
	Args.MakeArgString("-frandomize-layout-seed=" + Twine(A->getValue(0))));

	if (Arg *A = Args.getLastArg(options::OPT_frandomize_layout_seed_file_EQ))
	CmdArgs.push_back(Args.MakeArgString("-frandomize-layout-seed-file=" +
	Twine(A->getValue(0))));

	// -fvisibility= and -fvisibility-ms-compat are of a piece.
	if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ,
	options::OPT_fvisibility_ms_compat)) {
	if (A->getOption().matches(options::OPT_fvisibility_EQ)) {
	A->render(Args, CmdArgs);
	} else {
	assert(A->getOption().matches(options::OPT_fvisibility_ms_compat));
	CmdArgs.push_back("-fvisibility=hidden");
	CmdArgs.push_back("-ftype-visibility=default");
	}
	} else if (IsOpenMPDevice) {
	// When compiling for the OpenMP device we want protected visibility by
	// default. This prevents the device from accidentally preempting code on
	// the host, makes the system more robust, and improves performance.
	CmdArgs.push_back("-fvisibility=protected");
	}

	// PS4/PS5 process these options in addClangTargetOptions.
	if (!RawTriple.isPS()) {
	if (const Arg *A =
	Args.getLastArg(options::OPT_fvisibility_from_dllstorageclass,
	options::OPT_fno_visibility_from_dllstorageclass)) {
	if (A->getOption().matches(
	options::OPT_fvisibility_from_dllstorageclass)) {
	CmdArgs.push_back("-fvisibility-from-dllstorageclass");
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_dllexport_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_nodllstorageclass_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_externs_dllimport_EQ);
	Args.AddLastArg(CmdArgs,
	options::OPT_fvisibility_externs_nodllstorageclass_EQ);
	}
	}
	}

	if (const Arg *A = Args.getLastArg(options::OPT_mignore_xcoff_visibility)) {
	if (Triple.isOSAIX())
	CmdArgs.push_back("-mignore-xcoff-visibility");
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	if (const Arg *A =
	Args.getLastArg(options::OPT_mdefault_visibility_export_mapping_EQ)) {
	if (Triple.isOSAIX())
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	if (Args.hasFlag(options::OPT_fvisibility_inlines_hidden,
	options::OPT_fno_visibility_inlines_hidden, false))
	CmdArgs.push_back("-fvisibility-inlines-hidden");

	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var,
	options::OPT_fno_visibility_inlines_hidden_static_local_var);
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden);
	Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);

	if (Args.hasFlag(options::OPT_fnew_infallible,
	options::OPT_fno_new_infallible, false))
	CmdArgs.push_back("-fnew-infallible");

	if (Args.hasFlag(options::OPT_fno_operator_names,
	options::OPT_foperator_names, false))
	CmdArgs.push_back("-fno-operator-names");

	// Forward -f (flag) options which we can pass directly.
	Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
	Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
	Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs);
	Args.AddLastArg(CmdArgs, options::OPT_femulated_tls,
	options::OPT_fno_emulated_tls);
	Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ);

	if (Arg *A = Args.getLastArg(options::OPT_fzero_call_used_regs_EQ)) {
	// FIXME: There's no reason for this to be restricted to X86. The backend
	// code needs to be changed to include the appropriate function calls
	// automatically.
	if (!Triple.isX86() && !Triple.isAArch64())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	// AltiVec-like language extensions aren't relevant for assembling.
	if (!isa<PreprocessJobAction>(JA) \|\| Output.getType() != types::TY_PP_Asm)
	Args.AddLastArg(CmdArgs, options::OPT_fzvector);

	Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
	Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);

	// Forward flags for OpenMP. We don't do this if the current action is an
	// device offloading action other than OpenMP.
	if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
	options::OPT_fno_openmp, false) &&
	(JA.isDeviceOffloading(Action::OFK_None) \|\|
	JA.isDeviceOffloading(Action::OFK_OpenMP))) {
	switch (D.getOpenMPRuntime(Args)) {
	case Driver::OMPRT_OMP:
	case Driver::OMPRT_IOMP5:
	// Clang can generate useful OpenMP code for these two runtime libraries.
	CmdArgs.push_back("-fopenmp");

	// If no option regarding the use of TLS in OpenMP codegeneration is
	// given, decide a default based on the target. Otherwise rely on the
	// options and pass the right information to the frontend.
	if (!Args.hasFlag(options::OPT_fopenmp_use_tls,
	options::OPT_fnoopenmp_use_tls, /Default=/true))
	CmdArgs.push_back("-fnoopenmp-use-tls");
	Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
	options::OPT_fno_openmp_simd);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_enable_irbuilder);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
	if (!Args.hasFlag(options::OPT_fopenmp_extensions,
	options::OPT_fno_openmp_extensions, /Default=/true))
	CmdArgs.push_back("-fno-openmp-extensions");
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ);
	Args.AddAllArgs(CmdArgs,
	options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ);
	if (Args.hasFlag(options::OPT_fopenmp_optimistic_collapse,
	options::OPT_fno_openmp_optimistic_collapse,
	/Default=/false))
	CmdArgs.push_back("-fopenmp-optimistic-collapse");

	// When in OpenMP offloading mode with NVPTX target, forward
	// cuda-mode flag
	if (Args.hasFlag(options::OPT_fopenmp_cuda_mode,
	options::OPT_fno_openmp_cuda_mode, /Default=/false))
	CmdArgs.push_back("-fopenmp-cuda-mode");

	// When in OpenMP offloading mode, enable debugging on the device.
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ);
	if (Args.hasFlag(options::OPT_fopenmp_target_debug,
	options::OPT_fno_openmp_target_debug, /Default=/false))
	CmdArgs.push_back("-fopenmp-target-debug");

	// When in OpenMP offloading mode, forward assumptions information about
	// thread and team counts in the device.
	if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription,
	options::OPT_fno_openmp_assume_teams_oversubscription,
	/Default=/false))
	CmdArgs.push_back("-fopenmp-assume-teams-oversubscription");
	if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription,
	options::OPT_fno_openmp_assume_threads_oversubscription,
	/Default=/false))
	CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
	if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
	CmdArgs.push_back("-fopenmp-assume-no-thread-state");
	if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
	CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
	if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
	CmdArgs.push_back("-fopenmp-offload-mandatory");
	break;
	default:
	// By default, if Clang doesn't know how to generate useful OpenMP code
	// for a specific runtime library, we just don't pass the '-fopenmp' flag
	// down to the actual compilation.
	// FIXME: It would be better to have a mode which only omits IR
	// generation based on the OpenMP support so that we get consistent
	// semantic analysis, etc.
	break;
	}
	} else {
	Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
	options::OPT_fno_openmp_simd);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
	Args.addOptOutFlag(CmdArgs, options::OPT_fopenmp_extensions,
	options::OPT_fno_openmp_extensions);
	}

	// Forward the new driver to change offloading code generation.
	if (Args.hasFlag(options::OPT_offload_new_driver,
	options::OPT_no_offload_new_driver, false))
	CmdArgs.push_back("--offload-new-driver");

	SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);

	const XRayArgs &XRay = TC.getXRayArgs();
	XRay.addArgs(TC, Args, CmdArgs, InputType);

	for (const auto &Filename :
	Args.getAllArgValues(options::OPT_fprofile_list_EQ)) {
	if (D.getVFS().exists(Filename))
	CmdArgs.push_back(Args.MakeArgString("-fprofile-list=" + Filename));
	else
	D.Diag(clang::diag::err_drv_no_such_file) << Filename;
	}

	if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ)) {
	StringRef S0 = A->getValue(), S = S0;
	unsigned Size, Offset = 0;
	if (!Triple.isAArch64() && !Triple.isRISCV() && !Triple.isX86())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	else if (S.consumeInteger(10, Size) \|\|
	(!S.empty() && (!S.consume_front(",") \|\|
	S.consumeInteger(10, Offset) \|\| !S.empty())))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< S0 << A->getOption().getName();
	else if (Size < Offset)
	D.Diag(diag::err_drv_unsupported_fpatchable_function_entry_argument);
	else {
	CmdArgs.push_back(Args.MakeArgString(A->getSpelling() + Twine(Size)));
	CmdArgs.push_back(Args.MakeArgString(
	"-fpatchable-function-entry-offset=" + Twine(Offset)));
	}
	}

	Args.AddLastArg(CmdArgs, options::OPT_fms_hotpatch);

	if (TC.SupportsProfiling()) {
	Args.AddLastArg(CmdArgs, options::OPT_pg);

	llvm::Triple::ArchType Arch = TC.getArch();
	if (Arg *A = Args.getLastArg(options::OPT_mfentry)) {
	if (Arch == llvm::Triple::systemz \|\| TC.getTriple().isX86())
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	if (Arg *A = Args.getLastArg(options::OPT_mnop_mcount)) {
	if (Arch == llvm::Triple::systemz)
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	if (Arg *A = Args.getLastArg(options::OPT_mrecord_mcount)) {
	if (Arch == llvm::Triple::systemz)
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}
	if (Arg *A = Args.getLastArgNoClaim(options::OPT_p)) {
	if (!TC.getTriple().isOSAIX() && !TC.getTriple().isOSOpenBSD()) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}

	if (Args.getLastArg(options::OPT_fapple_kext) \|\|
	(Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType)))
	CmdArgs.push_back("-fapple-kext");

	Args.AddLastArg(CmdArgs, options::OPT_altivec_src_compat);
	Args.AddLastArg(CmdArgs, options::OPT_flax_vector_conversions_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch);
	Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info);
	Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_report_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_trace);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_granularity_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
	Args.AddLastArg(CmdArgs, options::OPT_malign_double);
	Args.AddLastArg(CmdArgs, options::OPT_fno_temp_file);

	if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
	CmdArgs.push_back("-ftrapv-handler");
	CmdArgs.push_back(A->getValue());
	}

	Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ);

	// -fno-strict-overflow implies -fwrapv if it isn't disabled, but
	// -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
	if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) {
	if (A->getOption().matches(options::OPT_fwrapv))
	CmdArgs.push_back("-fwrapv");
	} else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow,
	options::OPT_fno_strict_overflow)) {
	if (A->getOption().matches(options::OPT_fno_strict_overflow))
	CmdArgs.push_back("-fwrapv");
	}

	if (Arg *A = Args.getLastArg(options::OPT_freroll_loops,
	options::OPT_fno_reroll_loops))
	if (A->getOption().matches(options::OPT_freroll_loops))
	CmdArgs.push_back("-freroll-loops");

	Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops,
	options::OPT_fno_finite_loops);

	Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
	Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
	options::OPT_fno_unroll_loops);

	Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);

	Args.AddLastArg(CmdArgs, options::OPT_pthread);

	Args.addOptInFlag(CmdArgs, options::OPT_mspeculative_load_hardening,
	options::OPT_mno_speculative_load_hardening);

	RenderSSPOptions(D, TC, Args, CmdArgs, KernelOrKext);
	RenderSCPOptions(TC, Args, CmdArgs);
	RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fswift_async_fp_EQ);

	Args.addOptInFlag(CmdArgs, options::OPT_mstackrealign,
	options::OPT_mno_stackrealign);

	if (Args.hasArg(options::OPT_mstack_alignment)) {
	StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment);
	CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
	}

	if (Args.hasArg(options::OPT_mstack_probe_size)) {
	StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size);

	if (!Size.empty())
	CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size));
	else
	CmdArgs.push_back("-mstack-probe-size=0");
	}

	Args.addOptOutFlag(CmdArgs, options::OPT_mstack_arg_probe,
	options::OPT_mno_stack_arg_probe);

	if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
	options::OPT_mno_restrict_it)) {
	if (A->getOption().matches(options::OPT_mrestrict_it)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-restrict-it");
	} else {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-default-it");
	}
	}

	// Forward -cl options to -cc1
	RenderOpenCLOptions(Args, CmdArgs, InputType);

	// Forward hlsl options to -cc1
	RenderHLSLOptions(Args, CmdArgs, InputType);

	if (IsHIP) {
	if (Args.hasFlag(options::OPT_fhip_new_launch_api,
	options::OPT_fno_hip_new_launch_api, true))
	CmdArgs.push_back("-fhip-new-launch-api");
	if (Args.hasFlag(options::OPT_fgpu_allow_device_init,
	options::OPT_fno_gpu_allow_device_init, false))
	CmdArgs.push_back("-fgpu-allow-device-init");
	Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name,
	options::OPT_fno_hip_kernel_arg_name);
	}

	if (IsCuda \|\| IsHIP) {
	if (IsRDCMode)
	CmdArgs.push_back("-fgpu-rdc");
	if (Args.hasFlag(options::OPT_fgpu_defer_diag,
	options::OPT_fno_gpu_defer_diag, false))
	CmdArgs.push_back("-fgpu-defer-diag");
	if (Args.hasFlag(options::OPT_fgpu_exclude_wrong_side_overloads,
	options::OPT_fno_gpu_exclude_wrong_side_overloads,
	false)) {
	CmdArgs.push_back("-fgpu-exclude-wrong-side-overloads");
	CmdArgs.push_back("-fgpu-defer-diag");
	}
	}

	// Forward -nogpulib to -cc1.
	if (Args.hasArg(options::OPT_nogpulib))
	CmdArgs.push_back("-nogpulib");

	if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
	}

	if (Arg *A = Args.getLastArg(options::OPT_mfunction_return_EQ))
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-mfunction-return=") + A->getValue()));

	Args.AddLastArg(CmdArgs, options::OPT_mindirect_branch_cs_prefix);

	// Forward -f options with positive and negative forms; we translate these by
	// hand. Do not propagate PGO options to the GPU-side compilations as the
	// profile info is for the host-side compilation only.
	if (!(IsCudaDevice \|\| IsHIPDevice)) {
	if (Arg *A = getLastProfileSampleUseArg(Args)) {
	auto *PGOArg = Args.getLastArg(
	options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
	options::OPT_fcs_profile_generate,
	options::OPT_fcs_profile_generate_EQ, options::OPT_fprofile_use,
	options::OPT_fprofile_use_EQ);
	if (PGOArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< "SampleUse with PGO options";

	StringRef fname = A->getValue();
	if (!llvm::sys::fs::exists(fname))
	D.Diag(diag::err_drv_no_such_file) << fname;
	else
	A->render(Args, CmdArgs);
	}
	Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);

	if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
	options::OPT_fno_pseudo_probe_for_profiling, false)) {
	CmdArgs.push_back("-fpseudo-probe-for-profiling");
	// Enforce -funique-internal-linkage-names if it's not explicitly turned
	// off.
	if (Args.hasFlag(options::OPT_funique_internal_linkage_names,
	options::OPT_fno_unique_internal_linkage_names, true))
	CmdArgs.push_back("-funique-internal-linkage-names");
	}
	}
	RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs);

	Args.addOptOutFlag(CmdArgs, options::OPT_fassume_sane_operator_new,
	options::OPT_fno_assume_sane_operator_new);

	// -fblocks=0 is default.
	if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks,
	TC.IsBlocksDefault()) \|\|
	(Args.hasArg(options::OPT_fgnu_runtime) &&
	Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
	!Args.hasArg(options::OPT_fno_blocks))) {
	CmdArgs.push_back("-fblocks");

	if (!Args.hasArg(options::OPT_fgnu_runtime) && !TC.hasBlocksRuntime())
	CmdArgs.push_back("-fblocks-runtime-optional");
	}

	// -fencode-extended-block-signature=1 is default.
	if (TC.IsEncodeExtendedBlockSignatureDefault())
	CmdArgs.push_back("-fencode-extended-block-signature");

	if (Args.hasFlag(options::OPT_fcoroutines_ts, options::OPT_fno_coroutines_ts,
	false) &&
	types::isCXX(InputType)) {
	D.Diag(diag::warn_deperecated_fcoroutines_ts_flag);
	CmdArgs.push_back("-fcoroutines-ts");
	}

	if (Args.hasFlag(options::OPT_fcoro_aligned_allocation,
	options::OPT_fno_coro_aligned_allocation, false) &&
	types::isCXX(InputType))
	CmdArgs.push_back("-fcoro-aligned-allocation");

	Args.AddLastArg(CmdArgs, options::OPT_fdouble_square_bracket_attributes,
	options::OPT_fno_double_square_bracket_attributes);

	Args.addOptOutFlag(CmdArgs, options::OPT_faccess_control,
	options::OPT_fno_access_control);
	Args.addOptOutFlag(CmdArgs, options::OPT_felide_constructors,
	options::OPT_fno_elide_constructors);

	ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();

	if (KernelOrKext \|\| (types::isCXX(InputType) &&
	(RTTIMode == ToolChain::RM_Disabled)))
	CmdArgs.push_back("-fno-rtti");

	// -fshort-enums=0 is default for all architectures except Hexagon and z/OS.
	if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums,
	TC.getArch() == llvm::Triple::hexagon \|\| Triple.isOSzOS()))
	CmdArgs.push_back("-fshort-enums");

	RenderCharacterOptions(Args, AuxTriple ? *AuxTriple : RawTriple, CmdArgs);

	// -fuse-cxa-atexit is default.
	if (!Args.hasFlag(
	options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
	!RawTriple.isOSAIX() && !RawTriple.isOSWindows() &&
	((RawTriple.getVendor() != llvm::Triple::MipsTechnologies) \|\|
	RawTriple.hasEnvironment())) \|\|
	KernelOrKext)
	CmdArgs.push_back("-fno-use-cxa-atexit");

	if (Args.hasFlag(options::OPT_fregister_global_dtors_with_atexit,
	options::OPT_fno_register_global_dtors_with_atexit,
	RawTriple.isOSDarwin() && !KernelOrKext))
	CmdArgs.push_back("-fregister-global-dtors-with-atexit");

	Args.addOptInFlag(CmdArgs, options::OPT_fuse_line_directives,
	options::OPT_fno_use_line_directives);

	// -fno-minimize-whitespace is default.
	if (Args.hasFlag(options::OPT_fminimize_whitespace,
	options::OPT_fno_minimize_whitespace, false)) {
	types::ID InputType = Inputs[0].getType();
	if (!isDerivedFromC(InputType))
	D.Diag(diag::err_drv_minws_unsupported_input_type)
	<< types::getTypeName(InputType);
	CmdArgs.push_back("-fminimize-whitespace");
	}

	// -fms-extensions=0 is default.
	if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
	IsWindowsMSVC))
	CmdArgs.push_back("-fms-extensions");

	// -fms-compatibility=0 is default.
	bool IsMSVCCompat = Args.hasFlag(
	options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility,
	(IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions,
	options::OPT_fno_ms_extensions, true)));
	if (IsMSVCCompat)
	CmdArgs.push_back("-fms-compatibility");

	if (Triple.isWindowsMSVCEnvironment() && !D.IsCLMode() &&
	Args.hasArg(options::OPT_fms_runtime_lib_EQ))
	ProcessVSRuntimeLibrary(Args, CmdArgs);

	// Handle -fgcc-version, if present.
	VersionTuple GNUCVer;
	if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) {
	// Check that the version has 1 to 3 components and the minor and patch
	// versions fit in two decimal digits.
	StringRef Val = A->getValue();
	Val = Val.empty() ? "0" : Val; // Treat "" as 0 or disable.
	bool Invalid = GNUCVer.tryParse(Val);
	unsigned Minor = GNUCVer.getMinor().value_or(0);
	unsigned Patch = GNUCVer.getSubminor().value_or(0);
	if (Invalid \|\| GNUCVer.getBuild() \|\| Minor >= 100 \|\| Patch >= 100) {
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	}
	} else if (!IsMSVCCompat) {
	// Imitate GCC 4.2.1 by default if -fms-compatibility is not in effect.
	GNUCVer = VersionTuple(4, 2, 1);
	}
	if (!GNUCVer.empty()) {
	CmdArgs.push_back(
	Args.MakeArgString("-fgnuc-version=" + GNUCVer.getAsString()));
	}

	VersionTuple MSVT = TC.computeMSVCVersion(&D, Args);
	if (!MSVT.empty())
	CmdArgs.push_back(
	Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString()));

	bool IsMSVC2015Compatible = MSVT.getMajor() >= 19;
	if (ImplyVCPPCVer) {
	StringRef LanguageStandard;
	if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
	Std = StdArg;
	LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
	.Case("c11", "-std=c11")
	.Case("c17", "-std=c17")
	.Default("");
	if (LanguageStandard.empty())
	D.Diag(clang::diag::warn_drv_unused_argument)
	<< StdArg->getAsString(Args);
	}
	CmdArgs.push_back(LanguageStandard.data());
	}
	if (ImplyVCPPCXXVer) {
	StringRef LanguageStandard;
	if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
	Std = StdArg;
	LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
	.Case("c++14", "-std=c++14")
	.Case("c++17", "-std=c++17")
	.Case("c++20", "-std=c++20")
	.Case("c++latest", "-std=c++2b")
	.Default("");
	if (LanguageStandard.empty())
	D.Diag(clang::diag::warn_drv_unused_argument)
	<< StdArg->getAsString(Args);
	}

	if (LanguageStandard.empty()) {
	if (IsMSVC2015Compatible)
	LanguageStandard = "-std=c++14";
	else
	LanguageStandard = "-std=c++11";
	}

	CmdArgs.push_back(LanguageStandard.data());
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fborland_extensions,
	options::OPT_fno_borland_extensions);

	// -fno-declspec is default, except for PS4/PS5.
	if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec,
	RawTriple.isPS()))
	CmdArgs.push_back("-fdeclspec");
	else if (Args.hasArg(options::OPT_fno_declspec))
	CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec.

	// -fthreadsafe-static is default, except for MSVC compatibility versions less
	// than 19.
	if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
	options::OPT_fno_threadsafe_statics,
	!types::isOpenCL(InputType) &&
	(!IsWindowsMSVC \|\| IsMSVC2015Compatible)))
	CmdArgs.push_back("-fno-threadsafe-statics");

	// -fno-delayed-template-parsing is default, except when targeting MSVC.
	// Many old Windows SDK versions require this to parse.
	// FIXME: MSVC introduced /Zc:twoPhase- to disable this behavior in their
	// compiler. We should be able to disable this by default at some point.
	if (Args.hasFlag(options::OPT_fdelayed_template_parsing,
	options::OPT_fno_delayed_template_parsing, IsWindowsMSVC))
	CmdArgs.push_back("-fdelayed-template-parsing");

	// -fgnu-keywords default varies depending on language; only pass if
	// specified.
	Args.AddLastArg(CmdArgs, options::OPT_fgnu_keywords,
	options::OPT_fno_gnu_keywords);

	Args.addOptInFlag(CmdArgs, options::OPT_fgnu89_inline,
	options::OPT_fno_gnu89_inline);

	const Arg *InlineArg = Args.getLastArg(options::OPT_finline_functions,
	options::OPT_finline_hint_functions,
	options::OPT_fno_inline_functions);
	if (Arg *A = Args.getLastArg(options::OPT_finline, options::OPT_fno_inline)) {
	if (A->getOption().matches(options::OPT_fno_inline))
	A->render(Args, CmdArgs);
	} else if (InlineArg) {
	InlineArg->render(Args, CmdArgs);
	}

	Args.AddLastArg(CmdArgs, options::OPT_finline_max_stacksize_EQ);

	bool HaveModules =
	RenderModulesOptions(C, D, Args, Input, Output, Std, CmdArgs);

	if (Args.hasFlag(options::OPT_fpch_validate_input_files_content,
	options::OPT_fno_pch_validate_input_files_content, false))
	CmdArgs.push_back("-fvalidate-ast-input-files-content");
	if (Args.hasFlag(options::OPT_fpch_instantiate_templates,
	options::OPT_fno_pch_instantiate_templates, false))
	CmdArgs.push_back("-fpch-instantiate-templates");
	if (Args.hasFlag(options::OPT_fpch_codegen, options::OPT_fno_pch_codegen,
	false))
	CmdArgs.push_back("-fmodules-codegen");
	if (Args.hasFlag(options::OPT_fpch_debuginfo, options::OPT_fno_pch_debuginfo,
	false))
	CmdArgs.push_back("-fmodules-debuginfo");

	ObjCRuntime Runtime = AddObjCRuntimeArgs(Args, Inputs, CmdArgs, rewriteKind);
	RenderObjCOptions(TC, D, RawTriple, Args, Runtime, rewriteKind != RK_None,
	Input, CmdArgs);

	if (types::isObjC(Input.getType()) &&
	Args.hasFlag(options::OPT_fobjc_encode_cxx_class_template_spec,
	options::OPT_fno_objc_encode_cxx_class_template_spec,
	!Runtime.isNeXTFamily()))
	CmdArgs.push_back("-fobjc-encode-cxx-class-template-spec");

	if (Args.hasFlag(options::OPT_fapplication_extension,
	options::OPT_fno_application_extension, false))
	CmdArgs.push_back("-fapplication-extension");

	// Handle GCC-style exception args.
	bool EH = false;
	if (!C.getDriver().IsCLMode())
	EH = addExceptionArgs(Args, InputType, TC, KernelOrKext, Runtime, CmdArgs);

	// Handle exception personalities
	Arg *A = Args.getLastArg(
	options::OPT_fsjlj_exceptions, options::OPT_fseh_exceptions,
	options::OPT_fdwarf_exceptions, options::OPT_fwasm_exceptions);
	if (A) {
	const Option &Opt = A->getOption();
	if (Opt.matches(options::OPT_fsjlj_exceptions))
	CmdArgs.push_back("-exception-model=sjlj");
	if (Opt.matches(options::OPT_fseh_exceptions))
	CmdArgs.push_back("-exception-model=seh");
	if (Opt.matches(options::OPT_fdwarf_exceptions))
	CmdArgs.push_back("-exception-model=dwarf");
	if (Opt.matches(options::OPT_fwasm_exceptions))
	CmdArgs.push_back("-exception-model=wasm");
	} else {
	switch (TC.GetExceptionModel(Args)) {
	default:
	break;
	case llvm::ExceptionHandling::DwarfCFI:
	CmdArgs.push_back("-exception-model=dwarf");
	break;
	case llvm::ExceptionHandling::SjLj:
	CmdArgs.push_back("-exception-model=sjlj");
	break;
	case llvm::ExceptionHandling::WinEH:
	CmdArgs.push_back("-exception-model=seh");
	break;
	}
	}

	// C++ "sane" operator new.
	Args.addOptOutFlag(CmdArgs, options::OPT_fassume_sane_operator_new,
	options::OPT_fno_assume_sane_operator_new);

	// -frelaxed-template-template-args is off by default, as it is a severe
	// breaking change until a corresponding change to template partial ordering
	// is provided.
	Args.addOptInFlag(CmdArgs, options::OPT_frelaxed_template_template_args,
	options::OPT_fno_relaxed_template_template_args);

	// -fsized-deallocation is off by default, as it is an ABI-breaking change for
	// most platforms.
	Args.addOptInFlag(CmdArgs, options::OPT_fsized_deallocation,
	options::OPT_fno_sized_deallocation);

	// -faligned-allocation is on by default in C++17 onwards and otherwise off
	// by default.
	if (Arg *A = Args.getLastArg(options::OPT_faligned_allocation,
	options::OPT_fno_aligned_allocation,
	options::OPT_faligned_new_EQ)) {
	if (A->getOption().matches(options::OPT_fno_aligned_allocation))
	CmdArgs.push_back("-fno-aligned-allocation");
	else
	CmdArgs.push_back("-faligned-allocation");
	}

	// The default new alignment can be specified using a dedicated option or via
	// a GCC-compatible option that also turns on aligned allocation.
	if (Arg *A = Args.getLastArg(options::OPT_fnew_alignment_EQ,
	options::OPT_faligned_new_EQ))
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fnew-alignment=") + A->getValue()));

	// -fconstant-cfstrings is default, and may be subject to argument translation
	// on Darwin.
	if (!Args.hasFlag(options::OPT_fconstant_cfstrings,
	options::OPT_fno_constant_cfstrings, true) \|\|
	!Args.hasFlag(options::OPT_mconstant_cfstrings,
	options::OPT_mno_constant_cfstrings, true))
	CmdArgs.push_back("-fno-constant-cfstrings");

	Args.addOptInFlag(CmdArgs, options::OPT_fpascal_strings,
	options::OPT_fno_pascal_strings);

	// Honor -fpack-struct= and -fpack-struct, if given. Note that
	// -fno-pack-struct doesn't apply to -fpack-struct=.
	if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) {
	std::string PackStructStr = "-fpack-struct=";
	PackStructStr += A->getValue();
	CmdArgs.push_back(Args.MakeArgString(PackStructStr));
	} else if (Args.hasFlag(options::OPT_fpack_struct,
	options::OPT_fno_pack_struct, false)) {
	CmdArgs.push_back("-fpack-struct=1");
	}

	// Handle -fmax-type-align=N and -fno-type-align
	bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align);
	if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) {
	if (!SkipMaxTypeAlign) {
	std::string MaxTypeAlignStr = "-fmax-type-align=";
	MaxTypeAlignStr += A->getValue();
	CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
	}
	} else if (RawTriple.isOSDarwin()) {
	if (!SkipMaxTypeAlign) {
	std::string MaxTypeAlignStr = "-fmax-type-align=16";
	CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
	}
	}

	if (!Args.hasFlag(options::OPT_Qy, options::OPT_Qn, true))
	CmdArgs.push_back("-Qn");

	// -fno-common is the default, set -fcommon only when that flag is set.
	Args.addOptInFlag(CmdArgs, options::OPT_fcommon, options::OPT_fno_common);

	// -fsigned-bitfields is default, and clang doesn't yet support
	// -funsigned-bitfields.
	if (!Args.hasFlag(options::OPT_fsigned_bitfields,
	options::OPT_funsigned_bitfields, true))
	D.Diag(diag::warn_drv_clang_unsupported)
	<< Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);

	// -fsigned-bitfields is default, and clang doesn't support -fno-for-scope.
	if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope, true))
	D.Diag(diag::err_drv_clang_unsupported)
	<< Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);

	// -finput_charset=UTF-8 is default. Reject others
	if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) {
	StringRef value = inputCharset->getValue();
	if (!value.equals_insensitive("utf-8"))
	D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args)
	<< value;
	}

	// -fexec_charset=UTF-8 is default. Reject others
	if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
	StringRef value = execCharset->getValue();
	if (!value.equals_insensitive("utf-8"))
	D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
	<< value;
	}

	RenderDiagnosticsOptions(D, Args, CmdArgs);

	Args.addOptInFlag(CmdArgs, options::OPT_fasm_blocks,
	options::OPT_fno_asm_blocks);

	Args.addOptOutFlag(CmdArgs, options::OPT_fgnu_inline_asm,
	options::OPT_fno_gnu_inline_asm);

	// Enable vectorization per default according to the optimization level
	// selected. For optimization levels that want vectorization we use the alias
	// option to simplify the hasFlag logic.
	bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false);
	OptSpecifier VectorizeAliasOption =
	EnableVec ? options::OPT_O_Group : options::OPT_fvectorize;
	if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
	options::OPT_fno_vectorize, EnableVec))
	CmdArgs.push_back("-vectorize-loops");

	// -fslp-vectorize is enabled based on the optimization level selected.
	bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true);
	OptSpecifier SLPVectAliasOption =
	EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize;
	if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption,
	options::OPT_fno_slp_vectorize, EnableSLPVec))
	CmdArgs.push_back("-vectorize-slp");

	ParseMPreferVectorWidth(D, Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fshow_overloads_EQ);
	Args.AddLastArg(CmdArgs,
	options::OPT_fsanitize_undefined_strip_path_components_EQ);

	// -fdollars-in-identifiers default varies depending on platform and
	// language; only pass if specified.
	if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers,
	options::OPT_fno_dollars_in_identifiers)) {
	if (A->getOption().matches(options::OPT_fdollars_in_identifiers))
	CmdArgs.push_back("-fdollars-in-identifiers");
	else
	CmdArgs.push_back("-fno-dollars-in-identifiers");
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fapple_pragma_pack,
	options::OPT_fno_apple_pragma_pack);

	if (Args.hasFlag(options::OPT_fxl_pragma_pack,
	options::OPT_fno_xl_pragma_pack, RawTriple.isOSAIX()))
	CmdArgs.push_back("-fxl-pragma-pack");

	// Remarks can be enabled with any of the `-f.optimization-record.` flags.
	if (willEmitRemarks(Args) && checkRemarksOptions(D, Args, Triple))
	renderRemarksOptions(Args, CmdArgs, Triple, Input, Output, JA);

	bool RewriteImports = Args.hasFlag(options::OPT_frewrite_imports,
	options::OPT_fno_rewrite_imports, false);
	if (RewriteImports)
	CmdArgs.push_back("-frewrite-imports");

	Args.addOptInFlag(CmdArgs, options::OPT_fdirectives_only,
	options::OPT_fno_directives_only);

	// Enable rewrite includes if the user's asked for it or if we're generating
	// diagnostics.
	// TODO: Once -module-dependency-dir works with -frewrite-includes it'd be
	// nice to enable this when doing a crashdump for modules as well.
	if (Args.hasFlag(options::OPT_frewrite_includes,
	options::OPT_fno_rewrite_includes, false) \|\|
	(C.isForDiagnostics() && !HaveModules))
	CmdArgs.push_back("-frewrite-includes");

	// Only allow -traditional or -traditional-cpp outside in preprocessing modes.
	if (Arg *A = Args.getLastArg(options::OPT_traditional,
	options::OPT_traditional_cpp)) {
	if (isa<PreprocessJobAction>(JA))
	CmdArgs.push_back("-traditional-cpp");
	else
	D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
	}

	Args.AddLastArg(CmdArgs, options::OPT_dM);
	Args.AddLastArg(CmdArgs, options::OPT_dD);
	Args.AddLastArg(CmdArgs, options::OPT_dI);

	Args.AddLastArg(CmdArgs, options::OPT_fmax_tokens_EQ);

	// Handle serialized diagnostics.
	if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
	CmdArgs.push_back("-serialize-diagnostic-file");
	CmdArgs.push_back(Args.MakeArgString(A->getValue()));
	}

	if (Args.hasArg(options::OPT_fretain_comments_from_system_headers))
	CmdArgs.push_back("-fretain-comments-from-system-headers");

	// Forward -fcomment-block-commands to -cc1.
	Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands);
	// Forward -fparse-all-comments to -cc1.
	Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments);

	// Turn -fplugin=name.so into -load name.so
	for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) {
	CmdArgs.push_back("-load");
	CmdArgs.push_back(A->getValue());
	A->claim();
	}

	// Turn -fplugin-arg-pluginname-key=value into
	// -plugin-arg-pluginname key=value
	// GCC has an actual plugin_argument struct with key/value pairs that it
	// passes to its plugins, but we don't, so just pass it on as-is.
	//
	// The syntax for -fplugin-arg- is ambiguous if both plugin name and
	// argument key are allowed to contain dashes. GCC therefore only
	// allows dashes in the key. We do the same.
	for (const Arg *A : Args.filtered(options::OPT_fplugin_arg)) {
	auto ArgValue = StringRef(A->getValue());
	auto FirstDashIndex = ArgValue.find('-');
	StringRef PluginName = ArgValue.substr(0, FirstDashIndex);
	StringRef Arg = ArgValue.substr(FirstDashIndex + 1);

	A->claim();
	if (FirstDashIndex == StringRef::npos \|\| Arg.empty()) {
	if (PluginName.empty()) {
	D.Diag(diag::warn_drv_missing_plugin_name) << A->getAsString(Args);
	} else {
	D.Diag(diag::warn_drv_missing_plugin_arg)
	<< PluginName << A->getAsString(Args);
	}
	continue;
	}

	CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-arg-") + PluginName));
	CmdArgs.push_back(Args.MakeArgString(Arg));
	}

	// Forward -fpass-plugin=name.so to -cc1.
	for (const Arg *A : Args.filtered(options::OPT_fpass_plugin_EQ)) {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fpass-plugin=") + A->getValue()));
	A->claim();
	}

	// Setup statistics file output.
	SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
	if (!StatsFile.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("-stats-file=") + StatsFile));

	// Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option
	// parser.
	for (auto Arg : Args.filtered(options::OPT_Xclang)) {
	Arg->claim();
	// -finclude-default-header flag is for preprocessor,
	// do not pass it to other cc1 commands when save-temps is enabled
	if (C.getDriver().isSaveTempsEnabled() &&
	!isa<PreprocessJobAction>(JA)) {
	if (StringRef(Arg->getValue()) == "-finclude-default-header")
	continue;
	}
	CmdArgs.push_back(Arg->getValue());
	}
	for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
	A->claim();

	// We translate this by hand to the -cc1 argument, since nightly test uses
	// it and developers have been trained to spell it with -mllvm. Both
	// spellings are now deprecated and should be removed.
	if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") {
	CmdArgs.push_back("-disable-llvm-optzns");
	} else {
	A->render(Args, CmdArgs);
	}
	}

	// With -save-temps, we want to save the unoptimized bitcode output from the
	// CompileJobAction, use -disable-llvm-passes to get pristine IR generated
	// by the frontend.
	// When -fembed-bitcode is enabled, optimized bitcode is emitted because it
	// has slightly different breakdown between stages.
	// FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of
	// pristine IR generated by the frontend. Ideally, a new compile action should
	// be added so both IR can be captured.
	if ((C.getDriver().isSaveTempsEnabled() \|\|
	JA.isHostOffloading(Action::OFK_OpenMP)) &&
	!(C.getDriver().embedBitcodeInObject() && !IsUsingLTO) &&
	isa<CompileJobAction>(JA))
	CmdArgs.push_back("-disable-llvm-passes");

	Args.AddAllArgs(CmdArgs, options::OPT_undef);

	const char *Exec = D.getClangProgramPath();

	// Optionally embed the -cc1 level arguments into the debug info or a
	// section, for build analysis.
	// Also record command line arguments into the debug info if
	// -grecord-gcc-switches options is set on.
	// By default, -gno-record-gcc-switches is set on and no recording.
	auto GRecordSwitches =
	Args.hasFlag(options::OPT_grecord_command_line,
	options::OPT_gno_record_command_line, false);
	auto FRecordSwitches =
	Args.hasFlag(options::OPT_frecord_command_line,
	options::OPT_fno_record_command_line, false);
	if (FRecordSwitches && !Triple.isOSBinFormatELF())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Args.getLastArg(options::OPT_frecord_command_line)->getAsString(Args)
	<< TripleStr;
	if (TC.UseDwarfDebugFlags() \|\| GRecordSwitches \|\| FRecordSwitches) {
	ArgStringList OriginalArgs;
	for (const auto &Arg : Args)
	Arg->render(Args, OriginalArgs);

	SmallString<256> Flags;
	EscapeSpacesAndBackslashes(Exec, Flags);
	for (const char *OriginalArg : OriginalArgs) {
	SmallString<128> EscapedArg;
	EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
	Flags += " ";
	Flags += EscapedArg;
	}
	auto FlagsArgString = Args.MakeArgString(Flags);
	if (TC.UseDwarfDebugFlags() \|\| GRecordSwitches) {
	CmdArgs.push_back("-dwarf-debug-flags");
	CmdArgs.push_back(FlagsArgString);
	}
	if (FRecordSwitches) {
	CmdArgs.push_back("-record-command-line");
	CmdArgs.push_back(FlagsArgString);
	}
	}

	// Host-side offloading compilation receives all device-side outputs. Include
	// them in the host compilation depending on the target. If the host inputs
	// are not empty we use the new-driver scheme, otherwise use the old scheme.
	if ((IsCuda \|\| IsHIP) && CudaDeviceInput) {
	CmdArgs.push_back("-fcuda-include-gpubinary");
	CmdArgs.push_back(CudaDeviceInput->getFilename());
	} else if (!HostOffloadingInputs.empty()) {
	if ((IsCuda \|\| IsHIP) && !IsRDCMode) {
	assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
	CmdArgs.push_back("-fcuda-include-gpubinary");
	CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
	} else {
	for (const InputInfo Input : HostOffloadingInputs)
	CmdArgs.push_back(Args.MakeArgString("-fembed-offload-object=" +
	TC.getInputFilename(Input)));
	}
	}

	if (IsCuda) {
	if (Args.hasFlag(options::OPT_fcuda_short_ptr,
	options::OPT_fno_cuda_short_ptr, false))
	CmdArgs.push_back("-fcuda-short-ptr");
	}

	if (IsCuda \|\| IsHIP) {
	// Determine the original source input.
	const Action *SourceAction = &JA;
	while (SourceAction->getKind() != Action::InputClass) {
	assert(!SourceAction->getInputs().empty() && "unexpected root action!");
	SourceAction = SourceAction->getInputs()[0];
	}
	auto CUID = cast<InputAction>(SourceAction)->getId();
	if (!CUID.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID)));
	}

	if (IsHIP) {
	CmdArgs.push_back("-fcuda-allow-variadic-functions");
	Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
	}

	if (IsCudaDevice \|\| IsHIPDevice) {
	StringRef InlineThresh =
	Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
	if (!InlineThresh.empty()) {
	std::string ArgStr =
	std::string("-inline-threshold=") + InlineThresh.str();
	CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});
	}
	}

	// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
	// to specify the result of the compile phase on the host, so the meaningful
	// device declarations can be identified. Also, -fopenmp-is-device is passed
	// along to tell the frontend that it is generating code for a device, so that
	// only the relevant declarations are emitted.
	if (IsOpenMPDevice) {
	CmdArgs.push_back("-fopenmp-is-device");
	if (OpenMPDeviceInput) {
	CmdArgs.push_back("-fopenmp-host-ir-file-path");
	CmdArgs.push_back(Args.MakeArgString(OpenMPDeviceInput->getFilename()));
	}
	}

	if (Triple.isAMDGPU()) {
	handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);

	Args.addOptInFlag(CmdArgs, options::OPT_munsafe_fp_atomics,
	options::OPT_mno_unsafe_fp_atomics);
	}

	// For all the host OpenMP offloading compile jobs we need to pass the targets
	// information using -fopenmp-targets= option.
	if (JA.isHostOffloading(Action::OFK_OpenMP)) {
	SmallString<128> Targets("-fopenmp-targets=");

	SmallVector<std::string, 4> Triples;
	auto TCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
	std::transform(TCRange.first, TCRange.second, std::back_inserter(Triples),
	[](auto TC) { return TC.second->getTripleString(); });
	CmdArgs.push_back(Args.MakeArgString(Targets + llvm::join(Triples, ",")));
	}

	bool VirtualFunctionElimination =
	Args.hasFlag(options::OPT_fvirtual_function_elimination,
	options::OPT_fno_virtual_function_elimination, false);
	if (VirtualFunctionElimination) {
	// VFE requires full LTO (currently, this might be relaxed to allow ThinLTO
	// in the future).
	if (LTOMode != LTOK_Full)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< "-fvirtual-function-elimination"
	<< "-flto=full";

	CmdArgs.push_back("-fvirtual-function-elimination");
	}

	// VFE requires whole-program-vtables, and enables it by default.
	bool WholeProgramVTables = Args.hasFlag(
	options::OPT_fwhole_program_vtables,
	options::OPT_fno_whole_program_vtables, VirtualFunctionElimination);
	if (VirtualFunctionElimination && !WholeProgramVTables) {
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< "-fno-whole-program-vtables"
	<< "-fvirtual-function-elimination";
	}

	if (WholeProgramVTables) {
	// Propagate -fwhole-program-vtables if this is an LTO compile.
	if (IsUsingLTO)
	CmdArgs.push_back("-fwhole-program-vtables");
	// Check if we passed LTO options but they were suppressed because this is a
	// device offloading action, or we passed device offload LTO options which
	// were suppressed because this is not the device offload action.
	// Otherwise, issue an error.
	else if (!D.isUsingLTO(!IsDeviceOffloadAction))
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< "-fwhole-program-vtables"
	<< "-flto";
	}

	bool DefaultsSplitLTOUnit =
	(WholeProgramVTables \|\| SanitizeArgs.needsLTO()) &&
	(LTOMode == LTOK_Full \|\| TC.canSplitThinLTOUnit());
	bool SplitLTOUnit =
	Args.hasFlag(options::OPT_fsplit_lto_unit,
	options::OPT_fno_split_lto_unit, DefaultsSplitLTOUnit);
	if (SanitizeArgs.needsLTO() && !SplitLTOUnit)
	D.Diag(diag::err_drv_argument_not_allowed_with) << "-fno-split-lto-unit"
	<< "-fsanitize=cfi";
	if (SplitLTOUnit)
	CmdArgs.push_back("-fsplit-lto-unit");

	if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel,
	options::OPT_fno_global_isel)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_fglobal_isel)) {
	CmdArgs.push_back("-global-isel=1");

	// GISel is on by default on AArch64 -O0, so don't bother adding
	// the fallback remarks for it. Other combinations will add a warning of
	// some kind.
	bool IsArchSupported = Triple.getArch() == llvm::Triple::aarch64;
	bool IsOptLevelSupported = false;

	Arg *A = Args.getLastArg(options::OPT_O_Group);
	if (Triple.getArch() == llvm::Triple::aarch64) {
	if (!A \|\| A->getOption().matches(options::OPT_O0))
	IsOptLevelSupported = true;
	}
	if (!IsArchSupported \|\| !IsOptLevelSupported) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-global-isel-abort=2");

	if (!IsArchSupported)
	D.Diag(diag::warn_drv_global_isel_incomplete) << Triple.getArchName();
	else
	D.Diag(diag::warn_drv_global_isel_incomplete_opt);
	}
	} else {
	CmdArgs.push_back("-global-isel=0");
	}
	}

	if (Args.hasArg(options::OPT_forder_file_instrumentation)) {
	CmdArgs.push_back("-forder-file-instrumentation");
	// Enable order file instrumentation when ThinLTO is not on. When ThinLTO is
	// on, we need to pass these flags as linker flags and that will be handled
	// outside of the compiler.
	if (!IsUsingLTO) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-enable-order-file-instrumentation");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_fforce_enable_int128,
	options::OPT_fno_force_enable_int128)) {
	if (A->getOption().matches(options::OPT_fforce_enable_int128))
	CmdArgs.push_back("-fforce-enable-int128");
	}

	Args.addOptInFlag(CmdArgs, options::OPT_fkeep_static_consts,
	options::OPT_fno_keep_static_consts);
	Args.addOptInFlag(CmdArgs, options::OPT_fcomplete_member_pointers,
	options::OPT_fno_complete_member_pointers);
	Args.addOptOutFlag(CmdArgs, options::OPT_fcxx_static_destructors,
	options::OPT_fno_cxx_static_destructors);

	addMachineOutlinerArgs(D, Args, CmdArgs, Triple, /IsLTO=/false);

	if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics,
	options::OPT_mno_outline_atomics)) {
	// Option -moutline-atomics supported for AArch64 target only.
	if (!Triple.isAArch64()) {
	D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt)
	<< Triple.getArchName() << A->getOption().getName();
	} else {
	if (A->getOption().matches(options::OPT_moutline_atomics)) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+outline-atomics");
	} else {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-outline-atomics");
	}
	}
	} else if (Triple.isAArch64() &&
	getToolChain().IsAArch64OutlineAtomicsDefault(Args)) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+outline-atomics");
	}

	if (Triple.isAArch64() &&
	(Args.hasArg(options::OPT_mno_fmv) \|\|
	getToolChain().GetRuntimeLibType(Args) != ToolChain::RLT_CompilerRT)) {
	// Disable Function Multiversioning on AArch64 target.
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-fmv");
	}

	if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
	(TC.getTriple().isOSBinFormatELF() \|\|
	TC.getTriple().isOSBinFormatCOFF()) &&
	!TC.getTriple().isPS4() && !TC.getTriple().isVE() &&
	!TC.getTriple().isOSNetBSD() &&
	!Distro(D.getVFS(), TC.getTriple()).IsGentoo() &&
	!TC.getTriple().isAndroid() && TC.useIntegratedAs()))
	CmdArgs.push_back("-faddrsig");

	if ((Triple.isOSBinFormatELF() \|\| Triple.isOSBinFormatMachO()) &&
	(EH \|\| UnwindTables \|\| AsyncUnwindTables \|\|
	DebugInfoKind != codegenoptions::NoDebugInfo))
	CmdArgs.push_back("-D__GCC_HAVE_DWARF2_CFI_ASM=1");

	if (Arg *A = Args.getLastArg(options::OPT_fsymbol_partition_EQ)) {
	std::string Str = A->getAsString(Args);
	if (!TC.getTriple().isOSBinFormatELF())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Str << TC.getTripleString();
	CmdArgs.push_back(Args.MakeArgString(Str));
	}

	// Add the output path to the object file for CodeView debug infos.
	if (EmitCodeView && Output.isFilename())
	addDebugObjectName(Args, CmdArgs, DebugCompilationDir,
	Output.getFilename());

	// Add the "-o out -x type src.c" flags last. This is done primarily to make
	// the -cc1 command easier to edit when reproducing compiler crashes.
	if (Output.getType() == types::TY_Dependencies) {
	// Handled with other dependency code.
	} else if (Output.isFilename()) {
	if (Output.getType() == clang::driver::types::TY_IFS_CPP \|\|
	Output.getType() == clang::driver::types::TY_IFS) {
	SmallString<128> OutputFilename(Output.getFilename());
	llvm::sys::path::replace_extension(OutputFilename, "ifs");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Args.MakeArgString(OutputFilename));
	} else {
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	}
	} else {
	assert(Output.isNothing() && "Invalid output.");
	}

	addDashXForInput(Args, Input, CmdArgs);

	ArrayRef<InputInfo> FrontendInputs = Input;
	if (IsExtractAPI)
	FrontendInputs = ExtractAPIInputs;
	else if (Input.isNothing())
	FrontendInputs = {};

	for (const InputInfo &Input : FrontendInputs) {
	if (Input.isFilename())
	CmdArgs.push_back(Input.getFilename());
	else
	Input.getInputArg().renderAsInput(Args, CmdArgs);
	}

	if (D.CC1Main && !D.CCGenDiagnostics) {
	// Invoke the CC1 directly in this process
	C.addCommand(std::make_unique<CC1Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	} else {
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	}

	// Make the compile command echo its inputs for /showFilenames.
	if (Output.getType() == types::TY_Object &&
	Args.hasFlag(options::OPT__SLASH_showFilenames,
	options::OPT__SLASH_showFilenames_, false)) {
	C.getJobs().getJobs().back()->PrintInputFilenames = true;
	}

	if (Arg *A = Args.getLastArg(options::OPT_pg))
	if (FPKeepKind == CodeGenOptions::FramePointerKind::None &&
	!Args.hasArg(options::OPT_mfentry))
	D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
	<< A->getAsString(Args);

	// Claim some arguments which clang supports automatically.

	// -fpch-preprocess is used with gcc to add a special marker in the output to
	// include the PCH file.
	Args.ClaimAllArgs(options::OPT_fpch_preprocess);

	// Claim some arguments which clang doesn't support, but we don't
	// care to warn the user about.
	Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group);
	Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group);

	// Disable warnings for clang -E -emit-llvm foo.c
	Args.ClaimAllArgs(options::OPT_emit_llvm);
	}

	Clang::Clang(const ToolChain &TC, bool HasIntegratedBackend)
	// CAUTION! The first constructor argument ("clang") is not arbitrary,
	// as it is for other tools. Some operations on a Tool actually test
	// whether that tool is Clang based on the Tool's Name as a string.
	: Tool("clang", "clang frontend", TC), HasBackend(HasIntegratedBackend) {}

	Clang::~Clang() {}

	/// Add options related to the Objective-C runtime/ABI.
	///
	/// Returns true if the runtime is non-fragile.
	ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
	const InputInfoList &inputs,
	ArgStringList &cmdArgs,
	RewriteKind rewriteKind) const {
	// Look for the controlling runtime option.
	Arg *runtimeArg =
	args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime,
	options::OPT_fobjc_runtime_EQ);

	// Just forward -fobjc-runtime= to the frontend. This supercedes
	// options about fragility.
	if (runtimeArg &&
	runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) {
	ObjCRuntime runtime;
	StringRef value = runtimeArg->getValue();
	if (runtime.tryParse(value)) {
	getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime)
	<< value;
	}
	if ((runtime.getKind() == ObjCRuntime::GNUstep) &&
	(runtime.getVersion() >= VersionTuple(2, 0)))
	if (!getToolChain().getTriple().isOSBinFormatELF() &&
	!getToolChain().getTriple().isOSBinFormatCOFF()) {
	getToolChain().getDriver().Diag(
	diag::err_drv_gnustep_objc_runtime_incompatible_binary)
	<< runtime.getVersion().getMajor();
	}

	runtimeArg->render(args, cmdArgs);
	return runtime;
	}

	// Otherwise, we'll need the ABI "version". Version numbers are
	// slightly confusing for historical reasons:
	// 1 - Traditional "fragile" ABI
	// 2 - Non-fragile ABI, version 1
	// 3 - Non-fragile ABI, version 2
	unsigned objcABIVersion = 1;
	// If -fobjc-abi-version= is present, use that to set the version.
	if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) {
	StringRef value = abiArg->getValue();
	if (value == "1")
	objcABIVersion = 1;
	else if (value == "2")
	objcABIVersion = 2;
	else if (value == "3")
	objcABIVersion = 3;
	else
	getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value;
	} else {
	// Otherwise, determine if we are using the non-fragile ABI.
	bool nonFragileABIIsDefault =
	(rewriteKind == RK_NonFragile \|\|
	(rewriteKind == RK_None &&
	getToolChain().IsObjCNonFragileABIDefault()));
	if (args.hasFlag(options::OPT_fobjc_nonfragile_abi,
	options::OPT_fno_objc_nonfragile_abi,
	nonFragileABIIsDefault)) {
	// Determine the non-fragile ABI version to use.
	#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO
	unsigned nonFragileABIVersion = 1;
	#else
	unsigned nonFragileABIVersion = 2;
	#endif

	if (Arg *abiArg =
	args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) {
	StringRef value = abiArg->getValue();
	if (value == "1")
	nonFragileABIVersion = 1;
	else if (value == "2")
	nonFragileABIVersion = 2;
	else
	getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
	<< value;
	}

	objcABIVersion = 1 + nonFragileABIVersion;
	} else {
	objcABIVersion = 1;
	}
	}

	// We don't actually care about the ABI version other than whether
	// it's non-fragile.
	bool isNonFragile = objcABIVersion != 1;

	// If we have no runtime argument, ask the toolchain for its default runtime.
	// However, the rewriter only really supports the Mac runtime, so assume that.
	ObjCRuntime runtime;
	if (!runtimeArg) {
	switch (rewriteKind) {
	case RK_None:
	runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
	break;
	case RK_Fragile:
	runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple());
	break;
	case RK_NonFragile:
	runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
	break;
	}

	// -fnext-runtime
	} else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) {
	// On Darwin, make this use the default behavior for the toolchain.
	if (getToolChain().getTriple().isOSDarwin()) {
	runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);

	// Otherwise, build for a generic macosx port.
	} else {
	runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
	}

	// -fgnu-runtime
	} else {
	assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
	// Legacy behaviour is to target the gnustep runtime if we are in
	// non-fragile mode or the GCC runtime in fragile mode.
	if (isNonFragile)
	runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(2, 0));
	else
	runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple());
	}

	if (llvm::any_of(inputs, [](const InputInfo &input) {
	return types::isObjC(input.getType());
	}))
	cmdArgs.push_back(
	args.MakeArgString("-fobjc-runtime=" + runtime.getAsString()));
	return runtime;
	}

	static bool maybeConsumeDash(const std::string &EH, size_t &I) {
	bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-');
	I += HaveDash;
	return !HaveDash;
	}

	namespace {
	struct EHFlags {
	bool Synch = false;
	bool Asynch = false;
	bool NoUnwindC = false;
	};
	} // end anonymous namespace

	/// /EH controls whether to run destructor cleanups when exceptions are
	/// thrown. There are three modifiers:
	/// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions.
	/// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions.
	/// The 'a' modifier is unimplemented and fundamentally hard in LLVM IR.
	/// - c: Assume that extern "C" functions are implicitly nounwind.
	/// The default is /EHs-c-, meaning cleanups are disabled.
	static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
	EHFlags EH;

	std::vector<std::string> EHArgs =
	Args.getAllArgValues(options::OPT__SLASH_EH);
	for (auto EHVal : EHArgs) {
	for (size_t I = 0, E = EHVal.size(); I != E; ++I) {
	switch (EHVal[I]) {
	case 'a':
	EH.Asynch = maybeConsumeDash(EHVal, I);
	if (EH.Asynch)
	EH.Synch = false;
	continue;
	case 'c':
	EH.NoUnwindC = maybeConsumeDash(EHVal, I);
	continue;
	case 's':
	EH.Synch = maybeConsumeDash(EHVal, I);
	if (EH.Synch)
	EH.Asynch = false;
	continue;
	default:
	break;
	}
	D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal;
	break;
	}
	}
	// The /GX, /GX- flags are only processed if there are not /EH flags.
	// The default is that /GX is not specified.
	if (EHArgs.empty() &&
	Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_,
	/Default=/false)) {
	EH.Synch = true;
	EH.NoUnwindC = true;
	}

	if (Args.hasArg(options::OPT__SLASH_kernel)) {
	EH.Synch = false;
	EH.NoUnwindC = false;
	EH.Asynch = false;
	}

	return EH;
	}

	void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
	ArgStringList &CmdArgs,
	codegenoptions::DebugInfoKind *DebugInfoKind,
	bool *EmitCodeView) const {
	bool isNVPTX = getToolChain().getTriple().isNVPTX();

	ProcessVSRuntimeLibrary(Args, CmdArgs);

	if (Arg *ShowIncludes =
	Args.getLastArg(options::OPT__SLASH_showIncludes,
	options::OPT__SLASH_showIncludes_user)) {
	CmdArgs.push_back("--show-includes");
	if (ShowIncludes->getOption().matches(options::OPT__SLASH_showIncludes))
	CmdArgs.push_back("-sys-header-deps");
	}

	// This controls whether or not we emit RTTI data for polymorphic types.
	if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
	/Default=/false))
	CmdArgs.push_back("-fno-rtti-data");

	// This controls whether or not we emit stack-protector instrumentation.
	// In MSVC, Buffer Security Check (/GS) is on by default.
	if (!isNVPTX && Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_,
	/Default=/true)) {
	CmdArgs.push_back("-stack-protector");
	CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong)));
	}

	// Emit CodeView if -Z7 or -gline-tables-only are present.
	if (Arg *DebugInfoArg = Args.getLastArg(options::OPT__SLASH_Z7,
	options::OPT_gline_tables_only)) {
	*EmitCodeView = true;
	if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
	*DebugInfoKind = codegenoptions::DebugInfoConstructor;
	else
	*DebugInfoKind = codegenoptions::DebugLineTablesOnly;
	} else {
	*EmitCodeView = false;
	}

	const Driver &D = getToolChain().getDriver();

	// This controls whether or not we perform JustMyCode instrumentation.
	if (Args.hasFlag(options::OPT__SLASH_JMC, options::OPT__SLASH_JMC_,
	/Default=/false)) {
	if (EmitCodeView && DebugInfoKind >= codegenoptions::DebugInfoConstructor)
	CmdArgs.push_back("-fjmc");
	else
	D.Diag(clang::diag::warn_drv_jmc_requires_debuginfo) << "/JMC"
	<< "'/Zi', '/Z7'";
	}

	EHFlags EH = parseClangCLEHFlags(D, Args);
	if (!isNVPTX && (EH.Synch \|\| EH.Asynch)) {
	if (types::isCXX(InputType))
	CmdArgs.push_back("-fcxx-exceptions");
	CmdArgs.push_back("-fexceptions");
	}
	if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC)
	CmdArgs.push_back("-fexternc-nounwind");

	// /EP should expand to -E -P.
	if (Args.hasArg(options::OPT__SLASH_EP)) {
	CmdArgs.push_back("-E");
	CmdArgs.push_back("-P");
	}

	unsigned VolatileOptionID;
	if (getToolChain().getTriple().isX86())
	VolatileOptionID = options::OPT__SLASH_volatile_ms;
	else
	VolatileOptionID = options::OPT__SLASH_volatile_iso;

	if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group))
	VolatileOptionID = A->getOption().getID();

	if (VolatileOptionID == options::OPT__SLASH_volatile_ms)
	CmdArgs.push_back("-fms-volatile");

	if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_,
	options::OPT__SLASH_Zc_dllexportInlines,
	false)) {
	CmdArgs.push_back("-fno-dllexport-inlines");
	}

	if (Args.hasFlag(options::OPT__SLASH_Zc_wchar_t_,
	options::OPT__SLASH_Zc_wchar_t, false)) {
	CmdArgs.push_back("-fno-wchar");
	}

	if (Args.hasArg(options::OPT__SLASH_kernel)) {
	llvm::Triple::ArchType Arch = getToolChain().getArch();
	std::vector<std::string> Values =
	Args.getAllArgValues(options::OPT__SLASH_arch);
	if (!Values.empty()) {
	llvm::SmallSet<std::string, 4> SupportedArches;
	if (Arch == llvm::Triple::x86)
	SupportedArches.insert("IA32");

	for (auto &V : Values)
	if (!SupportedArches.contains(V))
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< std::string("/arch:").append(V) << "/kernel";
	}

	CmdArgs.push_back("-fno-rtti");
	if (Args.hasFlag(options::OPT__SLASH_GR, options::OPT__SLASH_GR_, false))
	D.Diag(diag::err_drv_argument_not_allowed_with) << "/GR"
	<< "/kernel";
	}

	Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg);
	Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb);
	if (MostGeneralArg && BestCaseArg)
	D.Diag(clang::diag::err_drv_argument_not_allowed_with)
	<< MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args);

	if (MostGeneralArg) {
	Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms);
	Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm);
	Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv);

	Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg;
	Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg;
	if (FirstConflict && SecondConflict && FirstConflict != SecondConflict)
	D.Diag(clang::diag::err_drv_argument_not_allowed_with)
	<< FirstConflict->getAsString(Args)
	<< SecondConflict->getAsString(Args);

	if (SingleArg)
	CmdArgs.push_back("-fms-memptr-rep=single");
	else if (MultipleArg)
	CmdArgs.push_back("-fms-memptr-rep=multiple");
	else
	CmdArgs.push_back("-fms-memptr-rep=virtual");
	}

	// Parse the default calling convention options.
	if (Arg *CCArg =
	Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
	options::OPT__SLASH_Gz, options::OPT__SLASH_Gv,
	options::OPT__SLASH_Gregcall)) {
	unsigned DCCOptId = CCArg->getOption().getID();
	const char *DCCFlag = nullptr;
	bool ArchSupported = !isNVPTX;
	llvm::Triple::ArchType Arch = getToolChain().getArch();
	switch (DCCOptId) {
	case options::OPT__SLASH_Gd:
	DCCFlag = "-fdefault-calling-conv=cdecl";
	break;
	case options::OPT__SLASH_Gr:
	ArchSupported = Arch == llvm::Triple::x86;
	DCCFlag = "-fdefault-calling-conv=fastcall";
	break;
	case options::OPT__SLASH_Gz:
	ArchSupported = Arch == llvm::Triple::x86;
	DCCFlag = "-fdefault-calling-conv=stdcall";
	break;
	case options::OPT__SLASH_Gv:
	ArchSupported = Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64;
	DCCFlag = "-fdefault-calling-conv=vectorcall";
	break;
	case options::OPT__SLASH_Gregcall:
	ArchSupported = Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64;
	DCCFlag = "-fdefault-calling-conv=regcall";
	break;
	}

	// MSVC doesn't warn if /Gr or /Gz is used on x64, so we don't either.
	if (ArchSupported && DCCFlag)
	CmdArgs.push_back(DCCFlag);
	}

	Args.AddLastArg(CmdArgs, options::OPT_vtordisp_mode_EQ);

	if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
	CmdArgs.push_back("-fdiagnostics-format");
	CmdArgs.push_back("msvc");
	}

	if (Args.hasArg(options::OPT__SLASH_kernel))
	CmdArgs.push_back("-fms-kernel");

	for (const Arg *A : Args.filtered(options::OPT__SLASH_guard)) {
	StringRef GuardArgs = A->getValue();
	// The only valid options are "cf", "cf,nochecks", "cf-", "ehcont" and
	// "ehcont-".
	if (GuardArgs.equals_insensitive("cf")) {
	// Emit CFG instrumentation and the table of address-taken functions.
	CmdArgs.push_back("-cfguard");
	} else if (GuardArgs.equals_insensitive("cf,nochecks")) {
	// Emit only the table of address-taken functions.
	CmdArgs.push_back("-cfguard-no-checks");
	} else if (GuardArgs.equals_insensitive("ehcont")) {
	// Emit EH continuation table.
	CmdArgs.push_back("-ehcontguard");
	} else if (GuardArgs.equals_insensitive("cf-") \|\|
	GuardArgs.equals_insensitive("ehcont-")) {
	// Do nothing, but we might want to emit a security warning in future.
	} else {
	D.Diag(diag::err_drv_invalid_value) << A->getSpelling() << GuardArgs;
	}
	+ A->claim();
	}
	}

	const char *Clang::getBaseInputName(const ArgList &Args,
	const InputInfo &Input) {
	return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput()));
	}

	const char *Clang::getBaseInputStem(const ArgList &Args,
	const InputInfoList &Inputs) {
	const char *Str = getBaseInputName(Args, Inputs[0]);

	if (const char *End = strrchr(Str, '.'))
	return Args.MakeArgString(std::string(Str, End));

	return Str;
	}

	const char *Clang::getDependencyFileName(const ArgList &Args,
	const InputInfoList &Inputs) {
	// FIXME: Think about this more.

	if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
	SmallString<128> OutputFilename(OutputOpt->getValue());
	llvm::sys::path::replace_extension(OutputFilename, llvm::Twine('d'));
	return Args.MakeArgString(OutputFilename);
	}

	return Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".d");
	}

	// Begin ClangAs

	void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	StringRef CPUName;
	StringRef ABIName;
	const llvm::Triple &Triple = getToolChain().getTriple();
	mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());
	}

	void ClangAs::AddX86TargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	addX86AlignBranchArgs(getToolChain().getDriver(), Args, CmdArgs,
	/IsLTO=/false);

	if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
	StringRef Value = A->getValue();
	if (Value == "intel" \|\| Value == "att") {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
	} else {
	getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
	<< A->getSpelling() << Value;
	}
	}
	}

	void ClangAs::AddRISCVTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &Triple = getToolChain().getTriple();
	StringRef ABIName = riscv::getRISCVABI(Args, Triple);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());
	}

	void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	assert(Inputs.size() == 1 && "Unexpected number of inputs.");
	const InputInfo &Input = Inputs[0];

	const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
	const std::string &TripleStr = Triple.getTriple();
	const auto &D = getToolChain().getDriver();

	// Don't warn about "clang -w -c foo.s"
	Args.ClaimAllArgs(options::OPT_w);
	// and "clang -emit-llvm -c foo.s"
	Args.ClaimAllArgs(options::OPT_emit_llvm);

	claimNoWarnArgs(Args);

	// Invoke ourselves in -cc1as mode.
	//
	// FIXME: Implement custom jobs for internal actions.
	CmdArgs.push_back("-cc1as");

	// Add the "effective" target triple.
	CmdArgs.push_back("-triple");
	CmdArgs.push_back(Args.MakeArgString(TripleStr));

	getToolChain().addClangCC1ASTargetOptions(Args, CmdArgs);

	// Set the output mode, we currently only expect to be used as a real
	// assembler.
	CmdArgs.push_back("-filetype");
	CmdArgs.push_back("obj");

	// Set the main file name, so that debug info works even with
	// -save-temps or preprocessed assembly.
	CmdArgs.push_back("-main-file-name");
	CmdArgs.push_back(Clang::getBaseInputName(Args, Input));

	// Add the target cpu
	std::string CPU = getCPUName(D, Args, Triple, /FromAs/ true);
	if (!CPU.empty()) {
	CmdArgs.push_back("-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(CPU));
	}

	// Add the target features
	getTargetFeatures(D, Triple, Args, CmdArgs, true);

	// Ignore explicit -force_cpusubtype_ALL option.
	(void)Args.hasArg(options::OPT_force__cpusubtype__ALL);

	// Pass along any -I options so we get proper .include search paths.
	Args.AddAllArgs(CmdArgs, options::OPT_I_Group);

	// Determine the original source input.
	auto FindSource = [](const Action S) -> const Action {
	while (S->getKind() != Action::InputClass) {
	assert(!S->getInputs().empty() && "unexpected root action!");
	S = S->getInputs()[0];
	}
	return S;
	};
	const Action *SourceAction = FindSource(&JA);

	// Forward -g and handle debug info related flags, assuming we are dealing
	// with an actual assembly file.
	bool WantDebug = false;
	Args.ClaimAllArgs(options::OPT_g_Group);
	if (Arg *A = Args.getLastArg(options::OPT_g_Group))
	WantDebug = !A->getOption().matches(options::OPT_g0) &&
	!A->getOption().matches(options::OPT_ggdb0);

	codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;

	// Add the -fdebug-compilation-dir flag if needed.
	const char *DebugCompilationDir =
	addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS());

	if (SourceAction->getType() == types::TY_Asm \|\|
	SourceAction->getType() == types::TY_PP_Asm) {
	// You might think that it would be ok to set DebugInfoKind outside of
	// the guard for source type, however there is a test which asserts
	// that some assembler invocation receives no -debug-info-kind,
	// and it's not clear whether that test is just overly restrictive.
	DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor
	: codegenoptions::NoDebugInfo);

	addDebugPrefixMapArg(getToolChain().getDriver(), getToolChain(), Args,
	CmdArgs);

	// Set the AT_producer to the clang version when using the integrated
	// assembler on assembly source files.
	CmdArgs.push_back("-dwarf-debug-producer");
	CmdArgs.push_back(Args.MakeArgString(getClangFullVersion()));

	// And pass along -I options
	Args.AddAllArgs(CmdArgs, options::OPT_I);
	}
	const unsigned DwarfVersion = getDwarfVersion(getToolChain(), Args);
	RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
	llvm::DebuggerKind::Default);
	renderDwarfFormat(D, Triple, Args, CmdArgs, DwarfVersion);
	RenderDebugInfoCompressionArgs(Args, CmdArgs, D, getToolChain());

	// Handle -fPIC et al -- the relocation-model affects the assembler
	// for some targets.
	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) =
	ParsePICArgs(getToolChain(), Args);

	const char *RMName = RelocationModelName(RelocationModel);
	if (RMName) {
	CmdArgs.push_back("-mrelocation-model");
	CmdArgs.push_back(RMName);
	}

	// Optionally embed the -cc1as level arguments into the debug info, for build
	// analysis.
	if (getToolChain().UseDwarfDebugFlags()) {
	ArgStringList OriginalArgs;
	for (const auto &Arg : Args)
	Arg->render(Args, OriginalArgs);

	SmallString<256> Flags;
	const char *Exec = getToolChain().getDriver().getClangProgramPath();
	EscapeSpacesAndBackslashes(Exec, Flags);
	for (const char *OriginalArg : OriginalArgs) {
	SmallString<128> EscapedArg;
	EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
	Flags += " ";
	Flags += EscapedArg;
	}
	CmdArgs.push_back("-dwarf-debug-flags");
	CmdArgs.push_back(Args.MakeArgString(Flags));
	}

	// FIXME: Add -static support, once we have it.

	// Add target specific flags.
	switch (getToolChain().getArch()) {
	default:
	break;

	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	AddMIPSTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	AddX86TargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	// This isn't in AddARMTargetArgs because we want to do this for assembly
	// only, not C/C++.
	if (Args.hasFlag(options::OPT_mdefault_build_attributes,
	options::OPT_mno_default_build_attributes, true)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-add-build-attributes");
	}
	break;

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	if (Args.hasArg(options::OPT_mmark_bti_property)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-aarch64-mark-bti-property");
	}
	break;

	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	AddRISCVTargetArgs(Args, CmdArgs);
	break;
	}

	// Consume all the warning flags. Usually this would be handled more
	// gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as
	// doesn't handle that so rather than warning about unused flags that are
	// actually used, we'll lie by omission instead.
	// FIXME: Stop lying and consume only the appropriate driver flags
	Args.ClaimAllArgs(options::OPT_W_Group);

	CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
	getToolChain().getDriver());

	Args.AddAllArgs(CmdArgs, options::OPT_mllvm);

	if (DebugInfoKind > codegenoptions::NoDebugInfo && Output.isFilename())
	addDebugObjectName(Args, CmdArgs, DebugCompilationDir,
	Output.getFilename());

	// Fixup any previous commands that use -object-file-name because when we
	// generated them, the final .obj name wasn't yet known.
	for (Command &J : C.getJobs()) {
	if (SourceAction != FindSource(&J.getSource()))
	continue;
	auto &JArgs = J.getArguments();
	for (unsigned I = 0; I < JArgs.size(); ++I) {
	if (StringRef(JArgs[I]).startswith("-object-file-name=") &&
	Output.isFilename()) {
	ArgStringList NewArgs(JArgs.begin(), JArgs.begin() + I);
	addDebugObjectName(Args, NewArgs, DebugCompilationDir,
	Output.getFilename());
	NewArgs.append(JArgs.begin() + I + 1, JArgs.end());
	J.replaceArguments(NewArgs);
	break;
	}
	}
	}

	assert(Output.isFilename() && "Unexpected lipo output.");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	const llvm::Triple &T = getToolChain().getTriple();
	Arg *A;
	if (getDebugFissionKind(D, Args, A) == DwarfFissionKind::Split &&
	T.isOSBinFormatELF()) {
	CmdArgs.push_back("-split-dwarf-output");
	CmdArgs.push_back(SplitDebugName(JA, Args, Input, Output));
	}

	if (Triple.isAMDGPU())
	handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs, /IsCC1As=/true);

	assert(Input.isFilename() && "Invalid input.");
	CmdArgs.push_back(Input.getFilename());

	const char *Exec = getToolChain().getDriver().getClangProgramPath();
	if (D.CC1Main && !D.CCGenDiagnostics) {
	// Invoke cc1as directly in this process.
	C.addCommand(std::make_unique<CC1Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	} else {
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	}
	}

	// Begin OffloadBundler

	void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const {
	// The version with only one output is expected to refer to a bundling job.
	assert(isa<OffloadBundlingJobAction>(JA) && "Expecting bundling job!");

	// The bundling command looks like this:
	// clang-offload-bundler -type=bc
	// -targets=host-triple,openmp-triple1,openmp-triple2
	// -output=output_file
	// -input=unbundle_file_host
	// -input=unbundle_file_tgt1
	// -input=unbundle_file_tgt2

	ArgStringList CmdArgs;

	// Get the type.
	CmdArgs.push_back(TCArgs.MakeArgString(
	Twine("-type=") + types::getTypeTempSuffix(Output.getType())));

	assert(JA.getInputs().size() == Inputs.size() &&
	"Not have inputs for all dependence actions??");

	// Get the targets.
	SmallString<128> Triples;
	Triples += "-targets=";
	for (unsigned I = 0; I < Inputs.size(); ++I) {
	if (I)
	Triples += ',';

	// Find ToolChain for this input.
	Action::OffloadKind CurKind = Action::OFK_Host;
	const ToolChain *CurTC = &getToolChain();
	const Action *CurDep = JA.getInputs()[I];

	if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
	CurTC = nullptr;
	OA->doOnEachDependence([&](Action A, const ToolChain TC, const char *) {
	assert(CurTC == nullptr && "Expected one dependence!");
	CurKind = A->getOffloadingDeviceKind();
	CurTC = TC;
	});
	}
	Triples += Action::GetOffloadKindName(CurKind);
	Triples += '-';
	Triples += CurTC->getTriple().normalize();
	if ((CurKind == Action::OFK_HIP \|\| CurKind == Action::OFK_Cuda) &&
	!StringRef(CurDep->getOffloadingArch()).empty()) {
	Triples += '-';
	Triples += CurDep->getOffloadingArch();
	}

	// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
	// with each toolchain.
	StringRef GPUArchName;
	if (CurKind == Action::OFK_OpenMP) {
	// Extract GPUArch from -march argument in TC argument list.
	for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
	auto ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
	auto Arch = ArchStr.startswith_insensitive("-march=");
	if (Arch) {
	GPUArchName = ArchStr.substr(7);
	Triples += "-";
	break;
	}
	}
	Triples += GPUArchName.str();
	}
	}
	CmdArgs.push_back(TCArgs.MakeArgString(Triples));

	// Get bundled file command.
	CmdArgs.push_back(
	TCArgs.MakeArgString(Twine("-output=") + Output.getFilename()));

	// Get unbundled files command.
	for (unsigned I = 0; I < Inputs.size(); ++I) {
	SmallString<128> UB;
	UB += "-input=";

	// Find ToolChain for this input.
	const ToolChain *CurTC = &getToolChain();
	if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
	CurTC = nullptr;
	OA->doOnEachDependence([&](Action , const ToolChain TC, const char *) {
	assert(CurTC == nullptr && "Expected one dependence!");
	CurTC = TC;
	});
	UB += C.addTempFile(
	C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I])));
	} else {
	UB += CurTC->getInputFilename(Inputs[I]);
	}
	CmdArgs.push_back(TCArgs.MakeArgString(UB));
	}
	// All the inputs are encoded as commands.
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::None(),
	TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
	CmdArgs, std::nullopt, Output));
	}

	void OffloadBundler::ConstructJobMultipleOutputs(
	Compilation &C, const JobAction &JA, const InputInfoList &Outputs,
	const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const {
	// The version with multiple outputs is expected to refer to a unbundling job.
	auto &UA = cast<OffloadUnbundlingJobAction>(JA);

	// The unbundling command looks like this:
	// clang-offload-bundler -type=bc
	// -targets=host-triple,openmp-triple1,openmp-triple2
	// -input=input_file
	// -output=unbundle_file_host
	// -output=unbundle_file_tgt1
	// -output=unbundle_file_tgt2
	// -unbundle

	ArgStringList CmdArgs;

	assert(Inputs.size() == 1 && "Expecting to unbundle a single file!");
	InputInfo Input = Inputs.front();

	// Get the type.
	CmdArgs.push_back(TCArgs.MakeArgString(
	Twine("-type=") + types::getTypeTempSuffix(Input.getType())));

	// Get the targets.
	SmallString<128> Triples;
	Triples += "-targets=";
	auto DepInfo = UA.getDependentActionsInfo();
	for (unsigned I = 0; I < DepInfo.size(); ++I) {
	if (I)
	Triples += ',';

	auto &Dep = DepInfo[I];
	Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
	Triples += '-';
	Triples += Dep.DependentToolChain->getTriple().normalize();
	if ((Dep.DependentOffloadKind == Action::OFK_HIP \|\|
	Dep.DependentOffloadKind == Action::OFK_Cuda) &&
	!Dep.DependentBoundArch.empty()) {
	Triples += '-';
	Triples += Dep.DependentBoundArch;
	}
	// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
	// with each toolchain.
	StringRef GPUArchName;
	if (Dep.DependentOffloadKind == Action::OFK_OpenMP) {
	// Extract GPUArch from -march argument in TC argument list.
	for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
	StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
	auto Arch = ArchStr.startswith_insensitive("-march=");
	if (Arch) {
	GPUArchName = ArchStr.substr(7);
	Triples += "-";
	break;
	}
	}
	Triples += GPUArchName.str();
	}
	}

	CmdArgs.push_back(TCArgs.MakeArgString(Triples));

	// Get bundled file command.
	CmdArgs.push_back(
	TCArgs.MakeArgString(Twine("-input=") + Input.getFilename()));

	// Get unbundled files command.
	for (unsigned I = 0; I < Outputs.size(); ++I) {
	SmallString<128> UB;
	UB += "-output=";
	UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
	CmdArgs.push_back(TCArgs.MakeArgString(UB));
	}
	CmdArgs.push_back("-unbundle");
	CmdArgs.push_back("-allow-missing-bundles");

	// All the inputs are encoded as commands.
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::None(),
	TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
	CmdArgs, std::nullopt, Outputs));
	}

	void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const llvm::opt::ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	// Add the output file name.
	assert(Output.isFilename() && "Invalid output.");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	// Create the inputs to bundle the needed metadata.
	for (const InputInfo &Input : Inputs) {
	const Action *OffloadAction = Input.getAction();
	const ToolChain *TC = OffloadAction->getOffloadingToolChain();
	const ArgList &TCArgs =
	C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(),
	OffloadAction->getOffloadingDeviceKind());
	StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input));
	StringRef Arch = (OffloadAction->getOffloadingArch())
	? OffloadAction->getOffloadingArch()
	: TCArgs.getLastArgValue(options::OPT_march_EQ);
	StringRef Kind =
	Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind());

	ArgStringList Features;
	SmallVector<StringRef> FeatureArgs;
	getTargetFeatures(TC->getDriver(), TC->getTriple(), TCArgs, Features,
	false);
	llvm::copy_if(Features, std::back_inserter(FeatureArgs),
	[](StringRef Arg) { return !Arg.startswith("-target"); });

	SmallVector<std::string> Parts{
	"file=" + File.str(),
	"triple=" + TC->getTripleString(),
	"arch=" + Arch.str(),
	"kind=" + Kind.str(),
	};

	if (TC->getDriver().isUsingLTO(/* IsOffload */ true))
	for (StringRef Feature : FeatureArgs)
	Parts.emplace_back("feature=" + Feature.str());

	CmdArgs.push_back(Args.MakeArgString("--image=" + llvm::join(Parts, ",")));
	}

	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::None(),
	Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
	CmdArgs, Inputs, Output));
	}

	void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	const Driver &D = getToolChain().getDriver();
	const llvm::Triple TheTriple = getToolChain().getTriple();
	ArgStringList CmdArgs;

	// Pass the CUDA path to the linker wrapper tool.
	for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
	auto TCRange = C.getOffloadToolChains(Kind);
	for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) {
	const ToolChain *TC = I.second;
	if (TC->getTriple().isNVPTX()) {
	CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
	if (CudaInstallation.isValid())
	CmdArgs.push_back(Args.MakeArgString(
	"--cuda-path=" + CudaInstallation.getInstallPath()));
	break;
	}
	}
	}

	if (D.isUsingLTO(/* IsOffload */ true)) {
	// Pass in the optimization level to use for LTO.
	if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	StringRef OOpt;
	if (A->getOption().matches(options::OPT_O4) \|\|
	A->getOption().matches(options::OPT_Ofast))
	OOpt = "3";
	else if (A->getOption().matches(options::OPT_O)) {
	OOpt = A->getValue();
	if (OOpt == "g")
	OOpt = "1";
	else if (OOpt == "s" \|\| OOpt == "z")
	OOpt = "2";
	} else if (A->getOption().matches(options::OPT_O0))
	OOpt = "0";
	if (!OOpt.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("--opt-level=O") + OOpt));
	}
	}

	CmdArgs.push_back(
	Args.MakeArgString("--host-triple=" + TheTriple.getTriple()));
	if (Args.hasArg(options::OPT_v))
	CmdArgs.push_back("--wrapper-verbose");

	if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
	if (!A->getOption().matches(options::OPT_g0))
	CmdArgs.push_back("--device-debug");
	}

	for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
	CmdArgs.push_back(Args.MakeArgString("--ptxas-arg=" + A));

	// Forward remarks passes to the LLVM backend in the wrapper.
	if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ))
	CmdArgs.push_back(Args.MakeArgString(Twine("--offload-opt=-pass-remarks=") +
	A->getValue()));
	if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("--offload-opt=-pass-remarks-missed=") + A->getValue()));
	if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("--offload-opt=-pass-remarks-analysis=") + A->getValue()));
	if (Args.getLastArg(options::OPT_save_temps_EQ))
	CmdArgs.push_back("--save-temps");

	// Construct the link job so we can wrap around it.
	Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput);
	const auto &LinkCommand = C.getJobs().getJobs().back();

	// Forward -Xoffload-linker<-triple> arguments to the device link job.
	for (Arg *A : Args.filtered(options::OPT_Xoffload_linker)) {
	StringRef Val = A->getValue(0);
	if (Val.empty())
	CmdArgs.push_back(
	Args.MakeArgString(Twine("--device-linker=") + A->getValue(1)));
	else
	CmdArgs.push_back(Args.MakeArgString(
	"--device-linker=" +
	ToolChain::getOpenMPTriple(Val.drop_front()).getTriple() + "=" +
	A->getValue(1)));
	}
	Args.ClaimAllArgs(options::OPT_Xoffload_linker);

	// Embed bitcode instead of an object in JIT mode.
	if (Args.hasFlag(options::OPT_fopenmp_target_jit,
	options::OPT_fno_openmp_target_jit, false))
	CmdArgs.push_back("--embed-bitcode");

	// Forward `-mllvm` arguments to the LLVM invocations if present.
	for (Arg *A : Args.filtered(options::OPT_mllvm)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(A->getValue());
	A->claim();
	}

	// Add the linker arguments to be forwarded by the wrapper.
	CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") +
	LinkCommand->getExecutable()));
	CmdArgs.push_back("--");
	for (const char *LinkArg : LinkCommand->getArguments())
	CmdArgs.push_back(LinkArg);

	const char *Exec =
	Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));

	// Replace the executable and arguments of the link job with the
	// wrapper.
	LinkCommand->replaceExecutable(Exec);
	LinkCommand->replaceArguments(CmdArgs);
	}
	diff --git a/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
	index 44034e44adec..3cc68673cd13 100644
	--- a/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
	+++ b/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
	@@ -1,217 +1,221 @@
	//===--- IntegerLiteralSeparatorFixer.cpp ------------------------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
	/// literal separators.
	///
	//===----------------------------------------------------------------------===//

	#include "IntegerLiteralSeparatorFixer.h"

	namespace clang {
	namespace format {

	enum class Base { Binary, Decimal, Hex, Other };

	static Base getBase(const StringRef IntegerLiteral) {
	assert(IntegerLiteral.size() > 1);

	if (IntegerLiteral[0] > '0') {
	assert(IntegerLiteral[0] <= '9');
	return Base::Decimal;
	}

	assert(IntegerLiteral[0] == '0');

	switch (IntegerLiteral[1]) {
	case 'b':
	case 'B':
	return Base::Binary;
	case 'x':
	case 'X':
	return Base::Hex;
	default:
	return Base::Other;
	}
	}

	std::pair<tooling::Replacements, unsigned>
	IntegerLiteralSeparatorFixer::process(const Environment &Env,
	const FormatStyle &Style) {
	switch (Style.Language) {
	case FormatStyle::LK_Cpp:
	case FormatStyle::LK_ObjC:
	Separator = '\'';
	break;
	case FormatStyle::LK_CSharp:
	case FormatStyle::LK_Java:
	case FormatStyle::LK_JavaScript:
	Separator = '_';
	break;
	default:
	return {};
	}

	const auto &Option = Style.IntegerLiteralSeparator;
	const auto Binary = Option.Binary;
	const auto Decimal = Option.Decimal;
	const auto Hex = Option.Hex;
	const bool SkipBinary = Binary == 0;
	const bool SkipDecimal = Decimal == 0;
	const bool SkipHex = Hex == 0;

	if (SkipBinary && SkipDecimal && SkipHex)
	return {};

	const auto BinaryMinDigits =
	std::max((int)Option.BinaryMinDigits, Binary + 1);
	const auto DecimalMinDigits =
	std::max((int)Option.DecimalMinDigits, Decimal + 1);
	const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);

	const auto &SourceMgr = Env.getSourceManager();
	AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());

	const auto ID = Env.getFileID();
	const auto LangOpts = getFormattingLangOpts(Style);
	Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
	Lex.SetCommentRetentionState(true);

	Token Tok;
	tooling::Replacements Result;

	for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
	auto Length = Tok.getLength();
	if (Length < 2)
	continue;
	auto Location = Tok.getLocation();
	auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
	if (Tok.is(tok::comment)) {
	if (Text == "// clang-format off" \|\| Text == "/* clang-format off */")
	Skip = true;
	else if (Text == "// clang-format on" \|\| Text == "/* clang-format on */")
	Skip = false;
	continue;
	}
	if (Skip \|\| Tok.isNot(tok::numeric_constant) \|\| Text[0] == '.' \|\|
	!AffectedRangeMgr.affectsCharSourceRange(
	CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
	continue;
	}
	const auto B = getBase(Text);
	const bool IsBase2 = B == Base::Binary;
	const bool IsBase10 = B == Base::Decimal;
	const bool IsBase16 = B == Base::Hex;
	if ((IsBase2 && SkipBinary) \|\| (IsBase10 && SkipDecimal) \|\|
	(IsBase16 && SkipHex) \|\| B == Base::Other) {
	continue;
	}
	if (Style.isCpp()) {
	- if (const auto Pos = Text.find_first_of("_i"); Pos != StringRef::npos) {
	+ // Hex alpha digits a-f/A-F must be at the end of the string literal.
	+ StringRef Suffixes = "_himnsuyd";
	+ if (const auto Pos =
	+ Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
	+ Pos != StringRef::npos) {
	Text = Text.substr(0, Pos);
	Length = Pos;
	}
	}
	if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) \|\|
	(IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
	continue;
	}
	const auto Start = Text[0] == '0' ? 2 : 0;
	auto End = Text.find_first_of("uUlLzZn", Start);
	if (End == StringRef::npos)
	End = Length;
	if (Start > 0 \|\| End < Length) {
	Length = End - Start;
	Text = Text.substr(Start, Length);
	}
	auto DigitsPerGroup = Decimal;
	auto MinDigits = DecimalMinDigits;
	if (IsBase2) {
	DigitsPerGroup = Binary;
	MinDigits = BinaryMinDigits;
	} else if (IsBase16) {
	DigitsPerGroup = Hex;
	MinDigits = HexMinDigits;
	}
	const auto SeparatorCount = Text.count(Separator);
	const int DigitCount = Length - SeparatorCount;
	const bool RemoveSeparator = DigitsPerGroup < 0 \|\| DigitCount < MinDigits;
	if (RemoveSeparator && SeparatorCount == 0)
	continue;
	if (!RemoveSeparator && SeparatorCount > 0 &&
	checkSeparator(Text, DigitsPerGroup)) {
	continue;
	}
	const auto &Formatted =
	format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
	assert(Formatted != Text);
	if (Start > 0)
	Location = Location.getLocWithOffset(Start);
	cantFail(Result.add(
	tooling::Replacement(SourceMgr, Location, Length, Formatted)));
	}

	return {Result, 0};
	}

	bool IntegerLiteralSeparatorFixer::checkSeparator(
	const StringRef IntegerLiteral, int DigitsPerGroup) const {
	assert(DigitsPerGroup > 0);

	int I = 0;
	for (auto C : llvm::reverse(IntegerLiteral)) {
	if (C == Separator) {
	if (I < DigitsPerGroup)
	return false;
	I = 0;
	} else {
	if (I == DigitsPerGroup)
	return false;
	++I;
	}
	}

	return true;
	}

	std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
	int DigitsPerGroup,
	int DigitCount,
	bool RemoveSeparator) const {
	assert(DigitsPerGroup != 0);

	std::string Formatted;

	if (RemoveSeparator) {
	for (auto C : IntegerLiteral)
	if (C != Separator)
	Formatted.push_back(C);
	return Formatted;
	}

	int Remainder = DigitCount % DigitsPerGroup;

	int I = 0;
	for (auto C : IntegerLiteral) {
	if (C == Separator)
	continue;
	if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
	Formatted.push_back(Separator);
	I = 0;
	Remainder = 0;
	}
	Formatted.push_back(C);
	++I;
	}

	return Formatted;
	}

	} // namespace format
	} // namespace clang
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaAccess.cpp b/contrib/llvm-project/clang/lib/Sema/SemaAccess.cpp
	index cbda62497e6a..4a39c2d065e6 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaAccess.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaAccess.cpp
	@@ -1,1986 +1,1987 @@
	//===---- SemaAccess.cpp - C++ Access Control -------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file provides Sema routines for C++ access control semantics.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Basic/Specifiers.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclFriend.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DependentDiagnostic.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/Sema/DelayedDiagnostic.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"

	using namespace clang;
	using namespace sema;

	/// A copy of Sema's enum without AR_delayed.
	enum AccessResult {
	AR_accessible,
	AR_inaccessible,
	AR_dependent
	};

	/// SetMemberAccessSpecifier - Set the access specifier of a member.
	/// Returns true on error (when the previous member decl access specifier
	/// is different from the new member decl access specifier).
	bool Sema::SetMemberAccessSpecifier(NamedDecl *MemberDecl,
	NamedDecl *PrevMemberDecl,
	AccessSpecifier LexicalAS) {
	if (!PrevMemberDecl) {
	// Use the lexical access specifier.
	MemberDecl->setAccess(LexicalAS);
	return false;
	}

	// C++ [class.access.spec]p3: When a member is redeclared its access
	// specifier must be same as its initial declaration.
	if (LexicalAS != AS_none && LexicalAS != PrevMemberDecl->getAccess()) {
	Diag(MemberDecl->getLocation(),
	diag::err_class_redeclared_with_different_access)
	<< MemberDecl << LexicalAS;
	Diag(PrevMemberDecl->getLocation(), diag::note_previous_access_declaration)
	<< PrevMemberDecl << PrevMemberDecl->getAccess();

	MemberDecl->setAccess(LexicalAS);
	return true;
	}

	MemberDecl->setAccess(PrevMemberDecl->getAccess());
	return false;
	}

	static CXXRecordDecl FindDeclaringClass(NamedDecl D) {
	DeclContext *DC = D->getDeclContext();

	// This can only happen at top: enum decls only "publish" their
	// immediate members.
	if (isa<EnumDecl>(DC))
	DC = cast<EnumDecl>(DC)->getDeclContext();

	CXXRecordDecl *DeclaringClass = cast<CXXRecordDecl>(DC);
	while (DeclaringClass->isAnonymousStructOrUnion())
	DeclaringClass = cast<CXXRecordDecl>(DeclaringClass->getDeclContext());
	return DeclaringClass;
	}

	namespace {
	struct EffectiveContext {
	EffectiveContext() : Inner(nullptr), Dependent(false) {}

	explicit EffectiveContext(DeclContext *DC)
	: Inner(DC),
	Dependent(DC->isDependentContext()) {

	// An implicit deduction guide is semantically in the context enclosing the
	// class template, but for access purposes behaves like the constructor
	// from which it was produced.
	if (auto *DGD = dyn_cast<CXXDeductionGuideDecl>(DC)) {
	if (DGD->isImplicit()) {
	DC = DGD->getCorrespondingConstructor();
	if (!DC) {
	// The copy deduction candidate doesn't have a corresponding
	// constructor.
	DC = cast<DeclContext>(DGD->getDeducedTemplate()->getTemplatedDecl());
	}
	}
	}

	// C++11 [class.access.nest]p1:
	// A nested class is a member and as such has the same access
	// rights as any other member.
	// C++11 [class.access]p2:
	// A member of a class can also access all the names to which
	// the class has access. A local class of a member function
	// may access the same names that the member function itself
	// may access.
	// This almost implies that the privileges of nesting are transitive.
	// Technically it says nothing about the local classes of non-member
	// functions (which can gain privileges through friendship), but we
	// take that as an oversight.
	while (true) {
	// We want to add canonical declarations to the EC lists for
	// simplicity of checking, but we need to walk up through the
	// actual current DC chain. Otherwise, something like a local
	// extern or friend which happens to be the canonical
	// declaration will really mess us up.

	if (isa<CXXRecordDecl>(DC)) {
	CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
	Records.push_back(Record->getCanonicalDecl());
	DC = Record->getDeclContext();
	} else if (isa<FunctionDecl>(DC)) {
	FunctionDecl *Function = cast<FunctionDecl>(DC);
	Functions.push_back(Function->getCanonicalDecl());
	if (Function->getFriendObjectKind())
	DC = Function->getLexicalDeclContext();
	else
	DC = Function->getDeclContext();
	} else if (DC->isFileContext()) {
	break;
	} else {
	DC = DC->getParent();
	}
	}
	}

	bool isDependent() const { return Dependent; }

	bool includesClass(const CXXRecordDecl *R) const {
	R = R->getCanonicalDecl();
	return llvm::is_contained(Records, R);
	}

	/// Retrieves the innermost "useful" context. Can be null if we're
	/// doing access-control without privileges.
	DeclContext *getInnerContext() const {
	return Inner;
	}

	typedef SmallVectorImpl<CXXRecordDecl*>::const_iterator record_iterator;

	DeclContext *Inner;
	SmallVector<FunctionDecl*, 4> Functions;
	SmallVector<CXXRecordDecl*, 4> Records;
	bool Dependent;
	};

	/// Like sema::AccessedEntity, but kindly lets us scribble all over
	/// it.
	struct AccessTarget : public AccessedEntity {
	AccessTarget(const AccessedEntity &Entity)
	: AccessedEntity(Entity) {
	initialize();
	}

	AccessTarget(ASTContext &Context,
	MemberNonce _,
	CXXRecordDecl *NamingClass,
	DeclAccessPair FoundDecl,
	QualType BaseObjectType)
	: AccessedEntity(Context.getDiagAllocator(), Member, NamingClass,
	FoundDecl, BaseObjectType) {
	initialize();
	}

	AccessTarget(ASTContext &Context,
	BaseNonce _,
	CXXRecordDecl *BaseClass,
	CXXRecordDecl *DerivedClass,
	AccessSpecifier Access)
	: AccessedEntity(Context.getDiagAllocator(), Base, BaseClass, DerivedClass,
	Access) {
	initialize();
	}

	bool isInstanceMember() const {
	return (isMemberAccess() && getTargetDecl()->isCXXInstanceMember());
	}

	bool hasInstanceContext() const {
	return HasInstanceContext;
	}

	class SavedInstanceContext {
	public:
	SavedInstanceContext(SavedInstanceContext &&S)
	: Target(S.Target), Has(S.Has) {
	S.Target = nullptr;
	}
	~SavedInstanceContext() {
	if (Target)
	Target->HasInstanceContext = Has;
	}

	private:
	friend struct AccessTarget;
	explicit SavedInstanceContext(AccessTarget &Target)
	: Target(&Target), Has(Target.HasInstanceContext) {}
	AccessTarget *Target;
	bool Has;
	};

	SavedInstanceContext saveInstanceContext() {
	return SavedInstanceContext(*this);
	}

	void suppressInstanceContext() {
	HasInstanceContext = false;
	}

	const CXXRecordDecl *resolveInstanceContext(Sema &S) const {
	assert(HasInstanceContext);
	if (CalculatedInstanceContext)
	return InstanceContext;

	CalculatedInstanceContext = true;
	DeclContext *IC = S.computeDeclContext(getBaseObjectType());
	InstanceContext = (IC ? cast<CXXRecordDecl>(IC)->getCanonicalDecl()
	: nullptr);
	return InstanceContext;
	}

	const CXXRecordDecl *getDeclaringClass() const {
	return DeclaringClass;
	}

	/// The "effective" naming class is the canonical non-anonymous
	/// class containing the actual naming class.
	const CXXRecordDecl *getEffectiveNamingClass() const {
	const CXXRecordDecl *namingClass = getNamingClass();
	while (namingClass->isAnonymousStructOrUnion())
	namingClass = cast<CXXRecordDecl>(namingClass->getParent());
	return namingClass->getCanonicalDecl();
	}

	private:
	void initialize() {
	HasInstanceContext = (isMemberAccess() &&
	!getBaseObjectType().isNull() &&
	getTargetDecl()->isCXXInstanceMember());
	CalculatedInstanceContext = false;
	InstanceContext = nullptr;

	if (isMemberAccess())
	DeclaringClass = FindDeclaringClass(getTargetDecl());
	else
	DeclaringClass = getBaseClass();
	DeclaringClass = DeclaringClass->getCanonicalDecl();
	}

	bool HasInstanceContext : 1;
	mutable bool CalculatedInstanceContext : 1;
	mutable const CXXRecordDecl *InstanceContext;
	const CXXRecordDecl *DeclaringClass;
	};

	}

	/// Checks whether one class might instantiate to the other.
	static bool MightInstantiateTo(const CXXRecordDecl *From,
	const CXXRecordDecl *To) {
	// Declaration names are always preserved by instantiation.
	if (From->getDeclName() != To->getDeclName())
	return false;

	const DeclContext *FromDC = From->getDeclContext()->getPrimaryContext();
	const DeclContext *ToDC = To->getDeclContext()->getPrimaryContext();
	if (FromDC == ToDC) return true;
	if (FromDC->isFileContext() \|\| ToDC->isFileContext()) return false;

	// Be conservative.
	return true;
	}

	/// Checks whether one class is derived from another, inclusively.
	/// Properly indicates when it couldn't be determined due to
	/// dependence.
	///
	/// This should probably be donated to AST or at least Sema.
	static AccessResult IsDerivedFromInclusive(const CXXRecordDecl *Derived,
	const CXXRecordDecl *Target) {
	assert(Derived->getCanonicalDecl() == Derived);
	assert(Target->getCanonicalDecl() == Target);

	if (Derived == Target) return AR_accessible;

	bool CheckDependent = Derived->isDependentContext();
	if (CheckDependent && MightInstantiateTo(Derived, Target))
	return AR_dependent;

	AccessResult OnFailure = AR_inaccessible;
	SmallVector<const CXXRecordDecl*, 8> Queue; // actually a stack

	while (true) {
	if (Derived->isDependentContext() && !Derived->hasDefinition() &&
	!Derived->isLambda())
	return AR_dependent;

	for (const auto &I : Derived->bases()) {
	const CXXRecordDecl *RD;

	QualType T = I.getType();
	if (const RecordType *RT = T->getAs<RecordType>()) {
	RD = cast<CXXRecordDecl>(RT->getDecl());
	} else if (const InjectedClassNameType *IT
	= T->getAs<InjectedClassNameType>()) {
	RD = IT->getDecl();
	} else {
	assert(T->isDependentType() && "non-dependent base wasn't a record?");
	OnFailure = AR_dependent;
	continue;
	}

	RD = RD->getCanonicalDecl();
	if (RD == Target) return AR_accessible;
	if (CheckDependent && MightInstantiateTo(RD, Target))
	OnFailure = AR_dependent;

	Queue.push_back(RD);
	}

	if (Queue.empty()) break;

	Derived = Queue.pop_back_val();
	}

	return OnFailure;
	}


	static bool MightInstantiateTo(Sema &S, DeclContext *Context,
	DeclContext *Friend) {
	if (Friend == Context)
	return true;

	assert(!Friend->isDependentContext() &&
	"can't handle friends with dependent contexts here");

	if (!Context->isDependentContext())
	return false;

	if (Friend->isFileContext())
	return false;

	// TODO: this is very conservative
	return true;
	}

	// Asks whether the type in 'context' can ever instantiate to the type
	// in 'friend'.
	static bool MightInstantiateTo(Sema &S, CanQualType Context, CanQualType Friend) {
	if (Friend == Context)
	return true;

	if (!Friend->isDependentType() && !Context->isDependentType())
	return false;

	// TODO: this is very conservative.
	return true;
	}

	static bool MightInstantiateTo(Sema &S,
	FunctionDecl *Context,
	FunctionDecl *Friend) {
	if (Context->getDeclName() != Friend->getDeclName())
	return false;

	if (!MightInstantiateTo(S,
	Context->getDeclContext(),
	Friend->getDeclContext()))
	return false;

	CanQual<FunctionProtoType> FriendTy
	= S.Context.getCanonicalType(Friend->getType())
	->getAs<FunctionProtoType>();
	CanQual<FunctionProtoType> ContextTy
	= S.Context.getCanonicalType(Context->getType())
	->getAs<FunctionProtoType>();

	// There isn't any way that I know of to add qualifiers
	// during instantiation.
	if (FriendTy.getQualifiers() != ContextTy.getQualifiers())
	return false;

	if (FriendTy->getNumParams() != ContextTy->getNumParams())
	return false;

	if (!MightInstantiateTo(S, ContextTy->getReturnType(),
	FriendTy->getReturnType()))
	return false;

	for (unsigned I = 0, E = FriendTy->getNumParams(); I != E; ++I)
	if (!MightInstantiateTo(S, ContextTy->getParamType(I),
	FriendTy->getParamType(I)))
	return false;

	return true;
	}

	static bool MightInstantiateTo(Sema &S,
	FunctionTemplateDecl *Context,
	FunctionTemplateDecl *Friend) {
	return MightInstantiateTo(S,
	Context->getTemplatedDecl(),
	Friend->getTemplatedDecl());
	}

	static AccessResult MatchesFriend(Sema &S,
	const EffectiveContext &EC,
	const CXXRecordDecl *Friend) {
	if (EC.includesClass(Friend))
	return AR_accessible;

	if (EC.isDependent()) {
	for (const CXXRecordDecl *Context : EC.Records) {
	if (MightInstantiateTo(Context, Friend))
	return AR_dependent;
	}
	}

	return AR_inaccessible;
	}

	static AccessResult MatchesFriend(Sema &S,
	const EffectiveContext &EC,
	CanQualType Friend) {
	if (const RecordType *RT = Friend->getAs<RecordType>())
	return MatchesFriend(S, EC, cast<CXXRecordDecl>(RT->getDecl()));

	// TODO: we can do better than this
	if (Friend->isDependentType())
	return AR_dependent;

	return AR_inaccessible;
	}

	/// Determines whether the given friend class template matches
	/// anything in the effective context.
	static AccessResult MatchesFriend(Sema &S,
	const EffectiveContext &EC,
	ClassTemplateDecl *Friend) {
	AccessResult OnFailure = AR_inaccessible;

	// Check whether the friend is the template of a class in the
	// context chain.
	for (SmallVectorImpl<CXXRecordDecl*>::const_iterator
	I = EC.Records.begin(), E = EC.Records.end(); I != E; ++I) {
	CXXRecordDecl Record = I;

	// Figure out whether the current class has a template:
	ClassTemplateDecl *CTD;

	// A specialization of the template...
	if (isa<ClassTemplateSpecializationDecl>(Record)) {
	CTD = cast<ClassTemplateSpecializationDecl>(Record)
	->getSpecializedTemplate();

	// ... or the template pattern itself.
	} else {
	CTD = Record->getDescribedClassTemplate();
	if (!CTD) continue;
	}

	// It's a match.
	if (Friend == CTD->getCanonicalDecl())
	return AR_accessible;

	// If the context isn't dependent, it can't be a dependent match.
	if (!EC.isDependent())
	continue;

	// If the template names don't match, it can't be a dependent
	// match.
	if (CTD->getDeclName() != Friend->getDeclName())
	continue;

	// If the class's context can't instantiate to the friend's
	// context, it can't be a dependent match.
	if (!MightInstantiateTo(S, CTD->getDeclContext(),
	Friend->getDeclContext()))
	continue;

	// Otherwise, it's a dependent match.
	OnFailure = AR_dependent;
	}

	return OnFailure;
	}

	/// Determines whether the given friend function matches anything in
	/// the effective context.
	static AccessResult MatchesFriend(Sema &S,
	const EffectiveContext &EC,
	FunctionDecl *Friend) {
	AccessResult OnFailure = AR_inaccessible;

	for (SmallVectorImpl<FunctionDecl*>::const_iterator
	I = EC.Functions.begin(), E = EC.Functions.end(); I != E; ++I) {
	if (Friend == *I)
	return AR_accessible;

	if (EC.isDependent() && MightInstantiateTo(S, *I, Friend))
	OnFailure = AR_dependent;
	}

	return OnFailure;
	}

	/// Determines whether the given friend function template matches
	/// anything in the effective context.
	static AccessResult MatchesFriend(Sema &S,
	const EffectiveContext &EC,
	FunctionTemplateDecl *Friend) {
	if (EC.Functions.empty()) return AR_inaccessible;

	AccessResult OnFailure = AR_inaccessible;

	for (SmallVectorImpl<FunctionDecl*>::const_iterator
	I = EC.Functions.begin(), E = EC.Functions.end(); I != E; ++I) {

	FunctionTemplateDecl FTD = (I)->getPrimaryTemplate();
	if (!FTD)
	FTD = (*I)->getDescribedFunctionTemplate();
	if (!FTD)
	continue;

	FTD = FTD->getCanonicalDecl();

	if (Friend == FTD)
	return AR_accessible;

	if (EC.isDependent() && MightInstantiateTo(S, FTD, Friend))
	OnFailure = AR_dependent;
	}

	return OnFailure;
	}

	/// Determines whether the given friend declaration matches anything
	/// in the effective context.
	static AccessResult MatchesFriend(Sema &S,
	const EffectiveContext &EC,
	FriendDecl *FriendD) {
	// Whitelist accesses if there's an invalid or unsupported friend
	// declaration.
	if (FriendD->isInvalidDecl() \|\| FriendD->isUnsupportedFriend())
	return AR_accessible;

	if (TypeSourceInfo *T = FriendD->getFriendType())
	return MatchesFriend(S, EC, T->getType()->getCanonicalTypeUnqualified());

	NamedDecl *Friend
	= cast<NamedDecl>(FriendD->getFriendDecl()->getCanonicalDecl());

	// FIXME: declarations with dependent or templated scope.

	if (isa<ClassTemplateDecl>(Friend))
	return MatchesFriend(S, EC, cast<ClassTemplateDecl>(Friend));

	if (isa<FunctionTemplateDecl>(Friend))
	return MatchesFriend(S, EC, cast<FunctionTemplateDecl>(Friend));

	if (isa<CXXRecordDecl>(Friend))
	return MatchesFriend(S, EC, cast<CXXRecordDecl>(Friend));

	assert(isa<FunctionDecl>(Friend) && "unknown friend decl kind");
	return MatchesFriend(S, EC, cast<FunctionDecl>(Friend));
	}

	static AccessResult GetFriendKind(Sema &S,
	const EffectiveContext &EC,
	const CXXRecordDecl *Class) {
	AccessResult OnFailure = AR_inaccessible;

	// Okay, check friends.
	for (auto *Friend : Class->friends()) {
	switch (MatchesFriend(S, EC, Friend)) {
	case AR_accessible:
	return AR_accessible;

	case AR_inaccessible:
	continue;

	case AR_dependent:
	OnFailure = AR_dependent;
	break;
	}
	}

	// That's it, give up.
	return OnFailure;
	}

	namespace {

	/// A helper class for checking for a friend which will grant access
	/// to a protected instance member.
	struct ProtectedFriendContext {
	Sema &S;
	const EffectiveContext &EC;
	const CXXRecordDecl *NamingClass;
	bool CheckDependent;
	bool EverDependent;

	/// The path down to the current base class.
	SmallVector<const CXXRecordDecl*, 20> CurPath;

	ProtectedFriendContext(Sema &S, const EffectiveContext &EC,
	const CXXRecordDecl *InstanceContext,
	const CXXRecordDecl *NamingClass)
	: S(S), EC(EC), NamingClass(NamingClass),
	CheckDependent(InstanceContext->isDependentContext() \|\|
	NamingClass->isDependentContext()),
	EverDependent(false) {}

	/// Check classes in the current path for friendship, starting at
	/// the given index.
	bool checkFriendshipAlongPath(unsigned I) {
	assert(I < CurPath.size());
	for (unsigned E = CurPath.size(); I != E; ++I) {
	switch (GetFriendKind(S, EC, CurPath[I])) {
	case AR_accessible: return true;
	case AR_inaccessible: continue;
	case AR_dependent: EverDependent = true; continue;
	}
	}
	return false;
	}

	/// Perform a search starting at the given class.
	///
	/// PrivateDepth is the index of the last (least derived) class
	/// along the current path such that a notional public member of
	/// the final class in the path would have access in that class.
	bool findFriendship(const CXXRecordDecl *Cur, unsigned PrivateDepth) {
	// If we ever reach the naming class, check the current path for
	// friendship. We can also stop recursing because we obviously
	// won't find the naming class there again.
	if (Cur == NamingClass)
	return checkFriendshipAlongPath(PrivateDepth);

	if (CheckDependent && MightInstantiateTo(Cur, NamingClass))
	EverDependent = true;

	// Recurse into the base classes.
	for (const auto &I : Cur->bases()) {
	// If this is private inheritance, then a public member of the
	// base will not have any access in classes derived from Cur.
	unsigned BasePrivateDepth = PrivateDepth;
	if (I.getAccessSpecifier() == AS_private)
	BasePrivateDepth = CurPath.size() - 1;

	const CXXRecordDecl *RD;

	QualType T = I.getType();
	if (const RecordType *RT = T->getAs<RecordType>()) {
	RD = cast<CXXRecordDecl>(RT->getDecl());
	} else if (const InjectedClassNameType *IT
	= T->getAs<InjectedClassNameType>()) {
	RD = IT->getDecl();
	} else {
	assert(T->isDependentType() && "non-dependent base wasn't a record?");
	EverDependent = true;
	continue;
	}

	// Recurse. We don't need to clean up if this returns true.
	CurPath.push_back(RD);
	if (findFriendship(RD->getCanonicalDecl(), BasePrivateDepth))
	return true;
	CurPath.pop_back();
	}

	return false;
	}

	bool findFriendship(const CXXRecordDecl *Cur) {
	assert(CurPath.empty());
	CurPath.push_back(Cur);
	return findFriendship(Cur, 0);
	}
	};
	}

	/// Search for a class P that EC is a friend of, under the constraint
	/// InstanceContext <= P
	/// if InstanceContext exists, or else
	/// NamingClass <= P
	/// and with the additional restriction that a protected member of
	/// NamingClass would have some natural access in P, which implicitly
	/// imposes the constraint that P <= NamingClass.
	///
	/// This isn't quite the condition laid out in the standard.
	/// Instead of saying that a notional protected member of NamingClass
	/// would have to have some natural access in P, it says the actual
	/// target has to have some natural access in P, which opens up the
	/// possibility that the target (which is not necessarily a member
	/// of NamingClass) might be more accessible along some path not
	/// passing through it. That's really a bad idea, though, because it
	/// introduces two problems:
	/// - Most importantly, it breaks encapsulation because you can
	/// access a forbidden base class's members by directly subclassing
	/// it elsewhere.
	/// - It also makes access substantially harder to compute because it
	/// breaks the hill-climbing algorithm: knowing that the target is
	/// accessible in some base class would no longer let you change
	/// the question solely to whether the base class is accessible,
	/// because the original target might have been more accessible
	/// because of crazy subclassing.
	/// So we don't implement that.
	static AccessResult GetProtectedFriendKind(Sema &S, const EffectiveContext &EC,
	const CXXRecordDecl *InstanceContext,
	const CXXRecordDecl *NamingClass) {
	assert(InstanceContext == nullptr \|\|
	InstanceContext->getCanonicalDecl() == InstanceContext);
	assert(NamingClass->getCanonicalDecl() == NamingClass);

	// If we don't have an instance context, our constraints give us
	// that NamingClass <= P <= NamingClass, i.e. P == NamingClass.
	// This is just the usual friendship check.
	if (!InstanceContext) return GetFriendKind(S, EC, NamingClass);

	ProtectedFriendContext PRC(S, EC, InstanceContext, NamingClass);
	if (PRC.findFriendship(InstanceContext)) return AR_accessible;
	if (PRC.EverDependent) return AR_dependent;
	return AR_inaccessible;
	}

	static AccessResult HasAccess(Sema &S,
	const EffectiveContext &EC,
	const CXXRecordDecl *NamingClass,
	AccessSpecifier Access,
	const AccessTarget &Target) {
	assert(NamingClass->getCanonicalDecl() == NamingClass &&
	"declaration should be canonicalized before being passed here");

	if (Access == AS_public) return AR_accessible;
	assert(Access == AS_private \|\| Access == AS_protected);

	AccessResult OnFailure = AR_inaccessible;

	for (EffectiveContext::record_iterator
	I = EC.Records.begin(), E = EC.Records.end(); I != E; ++I) {
	// All the declarations in EC have been canonicalized, so pointer
	// equality from this point on will work fine.
	const CXXRecordDecl ECRecord = I;

	// [B2] and [M2]
	if (Access == AS_private) {
	if (ECRecord == NamingClass)
	return AR_accessible;

	if (EC.isDependent() && MightInstantiateTo(ECRecord, NamingClass))
	OnFailure = AR_dependent;

	// [B3] and [M3]
	} else {
	assert(Access == AS_protected);
	switch (IsDerivedFromInclusive(ECRecord, NamingClass)) {
	case AR_accessible: break;
	case AR_inaccessible: continue;
	case AR_dependent: OnFailure = AR_dependent; continue;
	}

	// C++ [class.protected]p1:
	// An additional access check beyond those described earlier in
	// [class.access] is applied when a non-static data member or
	// non-static member function is a protected member of its naming
	// class. As described earlier, access to a protected member is
	// granted because the reference occurs in a friend or member of
	// some class C. If the access is to form a pointer to member,
	// the nested-name-specifier shall name C or a class derived from
	// C. All other accesses involve a (possibly implicit) object
	// expression. In this case, the class of the object expression
	// shall be C or a class derived from C.
	//
	// We interpret this as a restriction on [M3].

	// In this part of the code, 'C' is just our context class ECRecord.

	// These rules are different if we don't have an instance context.
	if (!Target.hasInstanceContext()) {
	// If it's not an instance member, these restrictions don't apply.
	if (!Target.isInstanceMember()) return AR_accessible;

	// If it's an instance member, use the pointer-to-member rule
	// that the naming class has to be derived from the effective
	// context.

	// Emulate a MSVC bug where the creation of pointer-to-member
	// to protected member of base class is allowed but only from
	// static member functions.
	if (S.getLangOpts().MSVCCompat && !EC.Functions.empty())
	if (CXXMethodDecl* MD = dyn_cast<CXXMethodDecl>(EC.Functions.front()))
	if (MD->isStatic()) return AR_accessible;

	// Despite the standard's confident wording, there is a case
	// where you can have an instance member that's neither in a
	// pointer-to-member expression nor in a member access: when
	// it names a field in an unevaluated context that can't be an
	// implicit member. Pending clarification, we just apply the
	// same naming-class restriction here.
	// FIXME: we're probably not correctly adding the
	// protected-member restriction when we retroactively convert
	// an expression to being evaluated.

	// We know that ECRecord derives from NamingClass. The
	// restriction says to check whether NamingClass derives from
	// ECRecord, but that's not really necessary: two distinct
	// classes can't be recursively derived from each other. So
	// along this path, we just need to check whether the classes
	// are equal.
	if (NamingClass == ECRecord) return AR_accessible;

	// Otherwise, this context class tells us nothing; on to the next.
	continue;
	}

	assert(Target.isInstanceMember());

	const CXXRecordDecl *InstanceContext = Target.resolveInstanceContext(S);
	if (!InstanceContext) {
	OnFailure = AR_dependent;
	continue;
	}

	switch (IsDerivedFromInclusive(InstanceContext, ECRecord)) {
	case AR_accessible: return AR_accessible;
	case AR_inaccessible: continue;
	case AR_dependent: OnFailure = AR_dependent; continue;
	}
	}
	}

	// [M3] and [B3] say that, if the target is protected in N, we grant
	// access if the access occurs in a friend or member of some class P
	// that's a subclass of N and where the target has some natural
	// access in P. The 'member' aspect is easy to handle because P
	// would necessarily be one of the effective-context records, and we
	// address that above. The 'friend' aspect is completely ridiculous
	// to implement because there are no restrictions at all on P
	// unless the [class.protected] restriction applies. If it does,
	// however, we should ignore whether the naming class is a friend,
	// and instead rely on whether any potential P is a friend.
	if (Access == AS_protected && Target.isInstanceMember()) {
	// Compute the instance context if possible.
	const CXXRecordDecl *InstanceContext = nullptr;
	if (Target.hasInstanceContext()) {
	InstanceContext = Target.resolveInstanceContext(S);
	if (!InstanceContext) return AR_dependent;
	}

	switch (GetProtectedFriendKind(S, EC, InstanceContext, NamingClass)) {
	case AR_accessible: return AR_accessible;
	case AR_inaccessible: return OnFailure;
	case AR_dependent: return AR_dependent;
	}
	llvm_unreachable("impossible friendship kind");
	}

	switch (GetFriendKind(S, EC, NamingClass)) {
	case AR_accessible: return AR_accessible;
	case AR_inaccessible: return OnFailure;
	case AR_dependent: return AR_dependent;
	}

	// Silence bogus warnings
	llvm_unreachable("impossible friendship kind");
	}

	/// Finds the best path from the naming class to the declaring class,
	/// taking friend declarations into account.
	///
	/// C++0x [class.access.base]p5:
	/// A member m is accessible at the point R when named in class N if
	/// [M1] m as a member of N is public, or
	/// [M2] m as a member of N is private, and R occurs in a member or
	/// friend of class N, or
	/// [M3] m as a member of N is protected, and R occurs in a member or
	/// friend of class N, or in a member or friend of a class P
	/// derived from N, where m as a member of P is public, private,
	/// or protected, or
	/// [M4] there exists a base class B of N that is accessible at R, and
	/// m is accessible at R when named in class B.
	///
	/// C++0x [class.access.base]p4:
	/// A base class B of N is accessible at R, if
	/// [B1] an invented public member of B would be a public member of N, or
	/// [B2] R occurs in a member or friend of class N, and an invented public
	/// member of B would be a private or protected member of N, or
	/// [B3] R occurs in a member or friend of a class P derived from N, and an
	/// invented public member of B would be a private or protected member
	/// of P, or
	/// [B4] there exists a class S such that B is a base class of S accessible
	/// at R and S is a base class of N accessible at R.
	///
	/// Along a single inheritance path we can restate both of these
	/// iteratively:
	///
	/// First, we note that M1-4 are equivalent to B1-4 if the member is
	/// treated as a notional base of its declaring class with inheritance
	/// access equivalent to the member's access. Therefore we need only
	/// ask whether a class B is accessible from a class N in context R.
	///
	/// Let B_1 .. B_n be the inheritance path in question (i.e. where
	/// B_1 = N, B_n = B, and for all i, B_{i+1} is a direct base class of
	/// B_i). For i in 1..n, we will calculate ACAB(i), the access to the
	/// closest accessible base in the path:
	/// Access(a, b) = (* access on the base specifier from a to b *)
	/// Merge(a, forbidden) = forbidden
	/// Merge(a, private) = forbidden
	/// Merge(a, b) = min(a,b)
	/// Accessible(c, forbidden) = false
	/// Accessible(c, private) = (R is c) \|\| IsFriend(c, R)
	/// Accessible(c, protected) = (R derived from c) \|\| IsFriend(c, R)
	/// Accessible(c, public) = true
	/// ACAB(n) = public
	/// ACAB(i) =
	/// let AccessToBase = Merge(Access(B_i, B_{i+1}), ACAB(i+1)) in
	/// if Accessible(B_i, AccessToBase) then public else AccessToBase
	///
	/// B is an accessible base of N at R iff ACAB(1) = public.
	///
	/// \param FinalAccess the access of the "final step", or AS_public if
	/// there is no final step.
	/// \return null if friendship is dependent
	static CXXBasePath *FindBestPath(Sema &S,
	const EffectiveContext &EC,
	AccessTarget &Target,
	AccessSpecifier FinalAccess,
	CXXBasePaths &Paths) {
	// Derive the paths to the desired base.
	const CXXRecordDecl *Derived = Target.getNamingClass();
	const CXXRecordDecl *Base = Target.getDeclaringClass();

	// FIXME: fail correctly when there are dependent paths.
	bool isDerived = Derived->isDerivedFrom(const_cast<CXXRecordDecl*>(Base),
	Paths);
	assert(isDerived && "derived class not actually derived from base");
	(void) isDerived;

	CXXBasePath *BestPath = nullptr;

	assert(FinalAccess != AS_none && "forbidden access after declaring class");

	bool AnyDependent = false;

	// Derive the friend-modified access along each path.
	for (CXXBasePaths::paths_iterator PI = Paths.begin(), PE = Paths.end();
	PI != PE; ++PI) {
	AccessTarget::SavedInstanceContext _ = Target.saveInstanceContext();

	// Walk through the path backwards.
	AccessSpecifier PathAccess = FinalAccess;
	CXXBasePath::iterator I = PI->end(), E = PI->begin();
	while (I != E) {
	--I;

	assert(PathAccess != AS_none);

	// If the declaration is a private member of a base class, there
	// is no level of friendship in derived classes that can make it
	// accessible.
	if (PathAccess == AS_private) {
	PathAccess = AS_none;
	break;
	}

	const CXXRecordDecl *NC = I->Class->getCanonicalDecl();

	AccessSpecifier BaseAccess = I->Base->getAccessSpecifier();
	PathAccess = std::max(PathAccess, BaseAccess);

	switch (HasAccess(S, EC, NC, PathAccess, Target)) {
	case AR_inaccessible: break;
	case AR_accessible:
	PathAccess = AS_public;

	// Future tests are not against members and so do not have
	// instance context.
	Target.suppressInstanceContext();
	break;
	case AR_dependent:
	AnyDependent = true;
	goto Next;
	}
	}

	// Note that we modify the path's Access field to the
	// friend-modified access.
	if (BestPath == nullptr \|\| PathAccess < BestPath->Access) {
	BestPath = &*PI;
	BestPath->Access = PathAccess;

	// Short-circuit if we found a public path.
	if (BestPath->Access == AS_public)
	return BestPath;
	}

	Next: ;
	}

	assert((!BestPath \|\| BestPath->Access != AS_public) &&
	"fell out of loop with public path");

	// We didn't find a public path, but at least one path was subject
	// to dependent friendship, so delay the check.
	if (AnyDependent)
	return nullptr;

	return BestPath;
	}

	/// Given that an entity has protected natural access, check whether
	/// access might be denied because of the protected member access
	/// restriction.
	///
	/// \return true if a note was emitted
	static bool TryDiagnoseProtectedAccess(Sema &S, const EffectiveContext &EC,
	AccessTarget &Target) {
	// Only applies to instance accesses.
	if (!Target.isInstanceMember())
	return false;

	assert(Target.isMemberAccess());

	const CXXRecordDecl *NamingClass = Target.getEffectiveNamingClass();

	for (EffectiveContext::record_iterator
	I = EC.Records.begin(), E = EC.Records.end(); I != E; ++I) {
	const CXXRecordDecl ECRecord = I;
	switch (IsDerivedFromInclusive(ECRecord, NamingClass)) {
	case AR_accessible: break;
	case AR_inaccessible: continue;
	case AR_dependent: continue;
	}

	// The effective context is a subclass of the declaring class.
	// Check whether the [class.protected] restriction is limiting
	// access.

	// To get this exactly right, this might need to be checked more
	// holistically; it's not necessarily the case that gaining
	// access here would grant us access overall.

	NamedDecl *D = Target.getTargetDecl();

	// If we don't have an instance context, [class.protected] says the
	// naming class has to equal the context class.
	if (!Target.hasInstanceContext()) {
	// If it does, the restriction doesn't apply.
	if (NamingClass == ECRecord) continue;

	// TODO: it would be great to have a fixit here, since this is
	// such an obvious error.
	S.Diag(D->getLocation(), diag::note_access_protected_restricted_noobject)
	<< S.Context.getTypeDeclType(ECRecord);
	return true;
	}

	const CXXRecordDecl *InstanceContext = Target.resolveInstanceContext(S);
	assert(InstanceContext && "diagnosing dependent access");

	switch (IsDerivedFromInclusive(InstanceContext, ECRecord)) {
	case AR_accessible: continue;
	case AR_dependent: continue;
	case AR_inaccessible:
	break;
	}

	// Okay, the restriction seems to be what's limiting us.

	// Use a special diagnostic for constructors and destructors.
	if (isa<CXXConstructorDecl>(D) \|\| isa<CXXDestructorDecl>(D) \|\|
	(isa<FunctionTemplateDecl>(D) &&
	isa<CXXConstructorDecl>(
	cast<FunctionTemplateDecl>(D)->getTemplatedDecl()))) {
	return S.Diag(D->getLocation(),
	diag::note_access_protected_restricted_ctordtor)
	<< isa<CXXDestructorDecl>(D->getAsFunction());
	}

	// Otherwise, use the generic diagnostic.
	return S.Diag(D->getLocation(),
	diag::note_access_protected_restricted_object)
	<< S.Context.getTypeDeclType(ECRecord);
	}

	return false;
	}

	/// We are unable to access a given declaration due to its direct
	/// access control; diagnose that.
	static void diagnoseBadDirectAccess(Sema &S,
	const EffectiveContext &EC,
	AccessTarget &entity) {
	assert(entity.isMemberAccess());
	NamedDecl *D = entity.getTargetDecl();

	if (D->getAccess() == AS_protected &&
	TryDiagnoseProtectedAccess(S, EC, entity))
	return;

	// Find an original declaration.
	while (D->isOutOfLine()) {
	NamedDecl *PrevDecl = nullptr;
	if (VarDecl *VD = dyn_cast<VarDecl>(D))
	PrevDecl = VD->getPreviousDecl();
	else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
	PrevDecl = FD->getPreviousDecl();
	else if (TypedefNameDecl *TND = dyn_cast<TypedefNameDecl>(D))
	PrevDecl = TND->getPreviousDecl();
	else if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
	if (isa<RecordDecl>(D) && cast<RecordDecl>(D)->isInjectedClassName())
	break;
	PrevDecl = TD->getPreviousDecl();
	}
	if (!PrevDecl) break;
	D = PrevDecl;
	}

	CXXRecordDecl *DeclaringClass = FindDeclaringClass(D);
	Decl *ImmediateChild;
	if (D->getDeclContext() == DeclaringClass)
	ImmediateChild = D;
	else {
	DeclContext *DC = D->getDeclContext();
	while (DC->getParent() != DeclaringClass)
	DC = DC->getParent();
	ImmediateChild = cast<Decl>(DC);
	}

	// Check whether there's an AccessSpecDecl preceding this in the
	// chain of the DeclContext.
	bool isImplicit = true;
	for (const auto *I : DeclaringClass->decls()) {
	if (I == ImmediateChild) break;
	if (isa<AccessSpecDecl>(I)) {
	isImplicit = false;
	break;
	}
	}

	S.Diag(D->getLocation(), diag::note_access_natural)
	<< (unsigned) (D->getAccess() == AS_protected)
	<< isImplicit;
	}

	/// Diagnose the path which caused the given declaration or base class
	/// to become inaccessible.
	static void DiagnoseAccessPath(Sema &S,
	const EffectiveContext &EC,
	AccessTarget &entity) {
	// Save the instance context to preserve invariants.
	AccessTarget::SavedInstanceContext _ = entity.saveInstanceContext();

	// This basically repeats the main algorithm but keeps some more
	// information.

	// The natural access so far.
	AccessSpecifier accessSoFar = AS_public;

	// Check whether we have special rights to the declaring class.
	if (entity.isMemberAccess()) {
	NamedDecl *D = entity.getTargetDecl();
	accessSoFar = D->getAccess();
	const CXXRecordDecl *declaringClass = entity.getDeclaringClass();

	switch (HasAccess(S, EC, declaringClass, accessSoFar, entity)) {
	// If the declaration is accessible when named in its declaring
	// class, then we must be constrained by the path.
	case AR_accessible:
	accessSoFar = AS_public;
	entity.suppressInstanceContext();
	break;

	case AR_inaccessible:
	if (accessSoFar == AS_private \|\|
	declaringClass == entity.getEffectiveNamingClass())
	return diagnoseBadDirectAccess(S, EC, entity);
	break;

	case AR_dependent:
	llvm_unreachable("cannot diagnose dependent access");
	}
	}

	CXXBasePaths paths;
	CXXBasePath &path = *FindBestPath(S, EC, entity, accessSoFar, paths);
	assert(path.Access != AS_public);

	CXXBasePath::iterator i = path.end(), e = path.begin();
	CXXBasePath::iterator constrainingBase = i;
	while (i != e) {
	--i;

	assert(accessSoFar != AS_none && accessSoFar != AS_private);

	// Is the entity accessible when named in the deriving class, as
	// modified by the base specifier?
	const CXXRecordDecl *derivingClass = i->Class->getCanonicalDecl();
	const CXXBaseSpecifier *base = i->Base;

	// If the access to this base is worse than the access we have to
	// the declaration, remember it.
	AccessSpecifier baseAccess = base->getAccessSpecifier();
	if (baseAccess > accessSoFar) {
	constrainingBase = i;
	accessSoFar = baseAccess;
	}

	switch (HasAccess(S, EC, derivingClass, accessSoFar, entity)) {
	case AR_inaccessible: break;
	case AR_accessible:
	accessSoFar = AS_public;
	entity.suppressInstanceContext();
	constrainingBase = nullptr;
	break;
	case AR_dependent:
	llvm_unreachable("cannot diagnose dependent access");
	}

	// If this was private inheritance, but we don't have access to
	// the deriving class, we're done.
	if (accessSoFar == AS_private) {
	assert(baseAccess == AS_private);
	assert(constrainingBase == i);
	break;
	}
	}

	// If we don't have a constraining base, the access failure must be
	// due to the original declaration.
	if (constrainingBase == path.end())
	return diagnoseBadDirectAccess(S, EC, entity);

	// We're constrained by inheritance, but we want to say
	// "declared private here" if we're diagnosing a hierarchy
	// conversion and this is the final step.
	unsigned diagnostic;
	if (entity.isMemberAccess() \|\|
	constrainingBase + 1 != path.end()) {
	diagnostic = diag::note_access_constrained_by_path;
	} else {
	diagnostic = diag::note_access_natural;
	}

	const CXXBaseSpecifier *base = constrainingBase->Base;

	S.Diag(base->getSourceRange().getBegin(), diagnostic)
	<< base->getSourceRange()
	<< (base->getAccessSpecifier() == AS_protected)
	<< (base->getAccessSpecifierAsWritten() == AS_none);

	if (entity.isMemberAccess())
	S.Diag(entity.getTargetDecl()->getLocation(),
	diag::note_member_declared_at);
	}

	static void DiagnoseBadAccess(Sema &S, SourceLocation Loc,
	const EffectiveContext &EC,
	AccessTarget &Entity) {
	const CXXRecordDecl *NamingClass = Entity.getNamingClass();
	const CXXRecordDecl *DeclaringClass = Entity.getDeclaringClass();
	NamedDecl *D = (Entity.isMemberAccess() ? Entity.getTargetDecl() : nullptr);

	S.Diag(Loc, Entity.getDiag())
	<< (Entity.getAccess() == AS_protected)
	<< (D ? D->getDeclName() : DeclarationName())
	<< S.Context.getTypeDeclType(NamingClass)
	<< S.Context.getTypeDeclType(DeclaringClass);
	DiagnoseAccessPath(S, EC, Entity);
	}

	/// MSVC has a bug where if during an using declaration name lookup,
	/// the declaration found is unaccessible (private) and that declaration
	/// was bring into scope via another using declaration whose target
	/// declaration is accessible (public) then no error is generated.
	/// Example:
	/// class A {
	/// public:
	/// int f();
	/// };
	/// class B : public A {
	/// private:
	/// using A::f;
	/// };
	/// class C : public B {
	/// private:
	/// using B::f;
	/// };
	///
	/// Here, B::f is private so this should fail in Standard C++, but
	/// because B::f refers to A::f which is public MSVC accepts it.
	static bool IsMicrosoftUsingDeclarationAccessBug(Sema& S,
	SourceLocation AccessLoc,
	AccessTarget &Entity) {
	if (UsingShadowDecl *Shadow =
	dyn_cast<UsingShadowDecl>(Entity.getTargetDecl()))
	if (UsingDecl *UD = dyn_cast<UsingDecl>(Shadow->getIntroducer())) {
	const NamedDecl *OrigDecl = Entity.getTargetDecl()->getUnderlyingDecl();
	if (Entity.getTargetDecl()->getAccess() == AS_private &&
	(OrigDecl->getAccess() == AS_public \|\|
	OrigDecl->getAccess() == AS_protected)) {
	S.Diag(AccessLoc, diag::ext_ms_using_declaration_inaccessible)
	<< UD->getQualifiedNameAsString()
	<< OrigDecl->getQualifiedNameAsString();
	return true;
	}
	}
	return false;
	}

	/// Determines whether the accessed entity is accessible. Public members
	/// have been weeded out by this point.
	static AccessResult IsAccessible(Sema &S,
	const EffectiveContext &EC,
	AccessTarget &Entity) {
	// Determine the actual naming class.
	const CXXRecordDecl *NamingClass = Entity.getEffectiveNamingClass();

	AccessSpecifier UnprivilegedAccess = Entity.getAccess();
	assert(UnprivilegedAccess != AS_public && "public access not weeded out");

	// Before we try to recalculate access paths, try to white-list
	// accesses which just trade in on the final step, i.e. accesses
	// which don't require [M4] or [B4]. These are by far the most
	// common forms of privileged access.
	if (UnprivilegedAccess != AS_none) {
	switch (HasAccess(S, EC, NamingClass, UnprivilegedAccess, Entity)) {
	case AR_dependent:
	// This is actually an interesting policy decision. We don't
	// have to delay immediately here: we can do the full access
	// calculation in the hope that friendship on some intermediate
	// class will make the declaration accessible non-dependently.
	// But that's not cheap, and odds are very good (note: assertion
	// made without data) that the friend declaration will determine
	// access.
	return AR_dependent;

	case AR_accessible: return AR_accessible;
	case AR_inaccessible: break;
	}
	}

	AccessTarget::SavedInstanceContext _ = Entity.saveInstanceContext();

	// We lower member accesses to base accesses by pretending that the
	// member is a base class of its declaring class.
	AccessSpecifier FinalAccess;

	if (Entity.isMemberAccess()) {
	// Determine if the declaration is accessible from EC when named
	// in its declaring class.
	NamedDecl *Target = Entity.getTargetDecl();
	const CXXRecordDecl *DeclaringClass = Entity.getDeclaringClass();

	FinalAccess = Target->getAccess();
	switch (HasAccess(S, EC, DeclaringClass, FinalAccess, Entity)) {
	case AR_accessible:
	// Target is accessible at EC when named in its declaring class.
	// We can now hill-climb and simply check whether the declaring
	// class is accessible as a base of the naming class. This is
	// equivalent to checking the access of a notional public
	// member with no instance context.
	FinalAccess = AS_public;
	Entity.suppressInstanceContext();
	break;
	case AR_inaccessible: break;
	case AR_dependent: return AR_dependent; // see above
	}

	if (DeclaringClass == NamingClass)
	return (FinalAccess == AS_public ? AR_accessible : AR_inaccessible);
	} else {
	FinalAccess = AS_public;
	}

	assert(Entity.getDeclaringClass() != NamingClass);

	// Append the declaration's access if applicable.
	CXXBasePaths Paths;
	CXXBasePath *Path = FindBestPath(S, EC, Entity, FinalAccess, Paths);
	if (!Path)
	return AR_dependent;

	assert(Path->Access <= UnprivilegedAccess &&
	"access along best path worse than direct?");
	if (Path->Access == AS_public)
	return AR_accessible;
	return AR_inaccessible;
	}

	static void DelayDependentAccess(Sema &S,
	const EffectiveContext &EC,
	SourceLocation Loc,
	const AccessTarget &Entity) {
	assert(EC.isDependent() && "delaying non-dependent access");
	DeclContext *DC = EC.getInnerContext();
	assert(DC->isDependentContext() && "delaying non-dependent access");
	DependentDiagnostic::Create(S.Context, DC, DependentDiagnostic::Access,
	Loc,
	Entity.isMemberAccess(),
	Entity.getAccess(),
	Entity.getTargetDecl(),
	Entity.getNamingClass(),
	Entity.getBaseObjectType(),
	Entity.getDiag());
	}

	/// Checks access to an entity from the given effective context.
	static AccessResult CheckEffectiveAccess(Sema &S,
	const EffectiveContext &EC,
	SourceLocation Loc,
	AccessTarget &Entity) {
	assert(Entity.getAccess() != AS_public && "called for public access!");

	switch (IsAccessible(S, EC, Entity)) {
	case AR_dependent:
	DelayDependentAccess(S, EC, Loc, Entity);
	return AR_dependent;

	case AR_inaccessible:
	if (S.getLangOpts().MSVCCompat &&
	IsMicrosoftUsingDeclarationAccessBug(S, Loc, Entity))
	return AR_accessible;
	if (!Entity.isQuiet())
	DiagnoseBadAccess(S, Loc, EC, Entity);
	return AR_inaccessible;

	case AR_accessible:
	return AR_accessible;
	}

	// silence unnecessary warning
	llvm_unreachable("invalid access result");
	}

	static Sema::AccessResult CheckAccess(Sema &S, SourceLocation Loc,
	AccessTarget &Entity) {
	// If the access path is public, it's accessible everywhere.
	if (Entity.getAccess() == AS_public)
	return Sema::AR_accessible;

	// If we're currently parsing a declaration, we may need to delay
	// access control checking, because our effective context might be
	// different based on what the declaration comes out as.
	//
	// For example, we might be parsing a declaration with a scope
	// specifier, like this:
	// A::private_type A::foo() { ... }
	//
	// Or we might be parsing something that will turn out to be a friend:
	// void foo(A::private_type);
	// void B::foo(A::private_type);
	if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
	S.DelayedDiagnostics.add(DelayedDiagnostic::makeAccess(Loc, Entity));
	return Sema::AR_delayed;
	}

	EffectiveContext EC(S.CurContext);
	switch (CheckEffectiveAccess(S, EC, Loc, Entity)) {
	case AR_accessible: return Sema::AR_accessible;
	case AR_inaccessible: return Sema::AR_inaccessible;
	case AR_dependent: return Sema::AR_dependent;
	}
	llvm_unreachable("invalid access result");
	}

	void Sema::HandleDelayedAccessCheck(DelayedDiagnostic &DD, Decl *D) {
	// Access control for names used in the declarations of functions
	// and function templates should normally be evaluated in the context
	// of the declaration, just in case it's a friend of something.
	// However, this does not apply to local extern declarations.

	DeclContext *DC = D->getDeclContext();
	if (D->isLocalExternDecl()) {
	DC = D->getLexicalDeclContext();
	} else if (FunctionDecl *FN = dyn_cast<FunctionDecl>(D)) {
	DC = FN;
	} else if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D)) {
	if (isa<DeclContext>(TD->getTemplatedDecl()))
	DC = cast<DeclContext>(TD->getTemplatedDecl());
	} else if (auto *RD = dyn_cast<RequiresExprBodyDecl>(D)) {
	DC = RD;
	}

	EffectiveContext EC(DC);

	AccessTarget Target(DD.getAccessData());

	if (CheckEffectiveAccess(*this, EC, DD.Loc, Target) == ::AR_inaccessible)
	DD.Triggered = true;
	}

	void Sema::HandleDependentAccessCheck(const DependentDiagnostic &DD,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	SourceLocation Loc = DD.getAccessLoc();
	AccessSpecifier Access = DD.getAccess();

	Decl *NamingD = FindInstantiatedDecl(Loc, DD.getAccessNamingClass(),
	TemplateArgs);
	if (!NamingD) return;
	Decl *TargetD = FindInstantiatedDecl(Loc, DD.getAccessTarget(),
	TemplateArgs);
	if (!TargetD) return;

	if (DD.isAccessToMember()) {
	CXXRecordDecl *NamingClass = cast<CXXRecordDecl>(NamingD);
	NamedDecl *TargetDecl = cast<NamedDecl>(TargetD);
	QualType BaseObjectType = DD.getAccessBaseObjectType();
	if (!BaseObjectType.isNull()) {
	BaseObjectType = SubstType(BaseObjectType, TemplateArgs, Loc,
	DeclarationName());
	if (BaseObjectType.isNull()) return;
	}

	AccessTarget Entity(Context,
	AccessTarget::Member,
	NamingClass,
	DeclAccessPair::make(TargetDecl, Access),
	BaseObjectType);
	Entity.setDiag(DD.getDiagnostic());
	CheckAccess(*this, Loc, Entity);
	} else {
	AccessTarget Entity(Context,
	AccessTarget::Base,
	cast<CXXRecordDecl>(TargetD),
	cast<CXXRecordDecl>(NamingD),
	Access);
	Entity.setDiag(DD.getDiagnostic());
	CheckAccess(*this, Loc, Entity);
	}
	}

	Sema::AccessResult Sema::CheckUnresolvedLookupAccess(UnresolvedLookupExpr *E,
	DeclAccessPair Found) {
	if (!getLangOpts().AccessControl \|\|
	!E->getNamingClass() \|\|
	Found.getAccess() == AS_public)
	return AR_accessible;

	AccessTarget Entity(Context, AccessTarget::Member, E->getNamingClass(),
	Found, QualType());
	Entity.setDiag(diag::err_access) << E->getSourceRange();

	return CheckAccess(*this, E->getNameLoc(), Entity);
	}

	/// Perform access-control checking on a previously-unresolved member
	/// access which has now been resolved to a member.
	Sema::AccessResult Sema::CheckUnresolvedMemberAccess(UnresolvedMemberExpr *E,
	DeclAccessPair Found) {
	if (!getLangOpts().AccessControl \|\|
	Found.getAccess() == AS_public)
	return AR_accessible;

	QualType BaseType = E->getBaseType();
	if (E->isArrow())
	BaseType = BaseType->castAs<PointerType>()->getPointeeType();

	AccessTarget Entity(Context, AccessTarget::Member, E->getNamingClass(),
	Found, BaseType);
	Entity.setDiag(diag::err_access) << E->getSourceRange();

	return CheckAccess(*this, E->getMemberLoc(), Entity);
	}

	/// Is the given member accessible for the purposes of deciding whether to
	/// define a special member function as deleted?
	bool Sema::isMemberAccessibleForDeletion(CXXRecordDecl *NamingClass,
	DeclAccessPair Found,
	QualType ObjectType,
	SourceLocation Loc,
	const PartialDiagnostic &Diag) {
	// Fast path.
	if (Found.getAccess() == AS_public \|\| !getLangOpts().AccessControl)
	return true;

	AccessTarget Entity(Context, AccessTarget::Member, NamingClass, Found,
	ObjectType);

	// Suppress diagnostics.
	Entity.setDiag(Diag);

	switch (CheckAccess(*this, Loc, Entity)) {
	case AR_accessible: return true;
	case AR_inaccessible: return false;
	case AR_dependent: llvm_unreachable("dependent for =delete computation");
	case AR_delayed: llvm_unreachable("cannot delay =delete computation");
	}
	llvm_unreachable("bad access result");
	}

	Sema::AccessResult Sema::CheckDestructorAccess(SourceLocation Loc,
	CXXDestructorDecl *Dtor,
	const PartialDiagnostic &PDiag,
	QualType ObjectTy) {
	if (!getLangOpts().AccessControl)
	return AR_accessible;

	// There's never a path involved when checking implicit destructor access.
	AccessSpecifier Access = Dtor->getAccess();
	if (Access == AS_public)
	return AR_accessible;

	CXXRecordDecl *NamingClass = Dtor->getParent();
	if (ObjectTy.isNull()) ObjectTy = Context.getTypeDeclType(NamingClass);

	AccessTarget Entity(Context, AccessTarget::Member, NamingClass,
	DeclAccessPair::make(Dtor, Access),
	ObjectTy);
	Entity.setDiag(PDiag); // TODO: avoid copy

	return CheckAccess(*this, Loc, Entity);
	}

	/// Checks access to a constructor.
	Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc,
	CXXConstructorDecl *Constructor,
	DeclAccessPair Found,
	const InitializedEntity &Entity,
	bool IsCopyBindingRefToTemp) {
	if (!getLangOpts().AccessControl \|\| Found.getAccess() == AS_public)
	return AR_accessible;

	PartialDiagnostic PD(PDiag());
	switch (Entity.getKind()) {
	default:
	PD = PDiag(IsCopyBindingRefToTemp
	? diag::ext_rvalue_to_reference_access_ctor
	: diag::err_access_ctor);

	break;

	case InitializedEntity::EK_Base:
	PD = PDiag(diag::err_access_base_ctor);
	PD << Entity.isInheritedVirtualBase()
	<< Entity.getBaseSpecifier()->getType() << getSpecialMember(Constructor);
	break;

	- case InitializedEntity::EK_Member: {
	+ case InitializedEntity::EK_Member:
	+ case InitializedEntity::EK_ParenAggInitMember: {
	const FieldDecl *Field = cast<FieldDecl>(Entity.getDecl());
	PD = PDiag(diag::err_access_field_ctor);
	PD << Field->getType() << getSpecialMember(Constructor);
	break;
	}

	case InitializedEntity::EK_LambdaCapture: {
	StringRef VarName = Entity.getCapturedVarName();
	PD = PDiag(diag::err_access_lambda_capture);
	PD << VarName << Entity.getType() << getSpecialMember(Constructor);
	break;
	}

	}

	return CheckConstructorAccess(UseLoc, Constructor, Found, Entity, PD);
	}

	/// Checks access to a constructor.
	Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc,
	CXXConstructorDecl *Constructor,
	DeclAccessPair Found,
	const InitializedEntity &Entity,
	const PartialDiagnostic &PD) {
	if (!getLangOpts().AccessControl \|\|
	Found.getAccess() == AS_public)
	return AR_accessible;

	CXXRecordDecl *NamingClass = Constructor->getParent();

	// Initializing a base sub-object is an instance method call on an
	// object of the derived class. Otherwise, we have an instance method
	// call on an object of the constructed type.
	//
	// FIXME: If we have a parent, we're initializing the base class subobject
	// in aggregate initialization. It's not clear whether the object class
	// should be the base class or the derived class in that case.
	CXXRecordDecl *ObjectClass;
	if ((Entity.getKind() == InitializedEntity::EK_Base \|\|
	Entity.getKind() == InitializedEntity::EK_Delegating) &&
	!Entity.getParent()) {
	ObjectClass = cast<CXXConstructorDecl>(CurContext)->getParent();
	} else if (auto *Shadow =
	dyn_cast<ConstructorUsingShadowDecl>(Found.getDecl())) {
	// If we're using an inheriting constructor to construct an object,
	// the object class is the derived class, not the base class.
	ObjectClass = Shadow->getParent();
	} else {
	ObjectClass = NamingClass;
	}

	AccessTarget AccessEntity(
	Context, AccessTarget::Member, NamingClass,
	DeclAccessPair::make(Constructor, Found.getAccess()),
	Context.getTypeDeclType(ObjectClass));
	AccessEntity.setDiag(PD);

	return CheckAccess(*this, UseLoc, AccessEntity);
	}

	/// Checks access to an overloaded operator new or delete.
	Sema::AccessResult Sema::CheckAllocationAccess(SourceLocation OpLoc,
	SourceRange PlacementRange,
	CXXRecordDecl *NamingClass,
	DeclAccessPair Found,
	bool Diagnose) {
	if (!getLangOpts().AccessControl \|\|
	!NamingClass \|\|
	Found.getAccess() == AS_public)
	return AR_accessible;

	AccessTarget Entity(Context, AccessTarget::Member, NamingClass, Found,
	QualType());
	if (Diagnose)
	Entity.setDiag(diag::err_access)
	<< PlacementRange;

	return CheckAccess(*this, OpLoc, Entity);
	}

	/// Checks access to a member.
	Sema::AccessResult Sema::CheckMemberAccess(SourceLocation UseLoc,
	CXXRecordDecl *NamingClass,
	DeclAccessPair Found) {
	if (!getLangOpts().AccessControl \|\|
	!NamingClass \|\|
	Found.getAccess() == AS_public)
	return AR_accessible;

	AccessTarget Entity(Context, AccessTarget::Member, NamingClass,
	Found, QualType());

	return CheckAccess(*this, UseLoc, Entity);
	}

	/// Checks implicit access to a member in a structured binding.
	Sema::AccessResult
	Sema::CheckStructuredBindingMemberAccess(SourceLocation UseLoc,
	CXXRecordDecl *DecomposedClass,
	DeclAccessPair Field) {
	if (!getLangOpts().AccessControl \|\|
	Field.getAccess() == AS_public)
	return AR_accessible;

	AccessTarget Entity(Context, AccessTarget::Member, DecomposedClass, Field,
	Context.getRecordType(DecomposedClass));
	Entity.setDiag(diag::err_decomp_decl_inaccessible_field);

	return CheckAccess(*this, UseLoc, Entity);
	}

	Sema::AccessResult Sema::CheckMemberOperatorAccess(SourceLocation OpLoc,
	Expr *ObjectExpr,
	const SourceRange &Range,
	DeclAccessPair Found) {
	if (!getLangOpts().AccessControl \|\| Found.getAccess() == AS_public)
	return AR_accessible;

	const RecordType *RT = ObjectExpr->getType()->castAs<RecordType>();
	CXXRecordDecl *NamingClass = cast<CXXRecordDecl>(RT->getDecl());

	AccessTarget Entity(Context, AccessTarget::Member, NamingClass, Found,
	ObjectExpr->getType());
	Entity.setDiag(diag::err_access) << ObjectExpr->getSourceRange() << Range;

	return CheckAccess(*this, OpLoc, Entity);
	}

	/// Checks access to an overloaded member operator, including
	/// conversion operators.
	Sema::AccessResult Sema::CheckMemberOperatorAccess(SourceLocation OpLoc,
	Expr *ObjectExpr,
	Expr *ArgExpr,
	DeclAccessPair Found) {
	return CheckMemberOperatorAccess(
	OpLoc, ObjectExpr, ArgExpr ? ArgExpr->getSourceRange() : SourceRange(),
	Found);
	}

	Sema::AccessResult Sema::CheckMemberOperatorAccess(SourceLocation OpLoc,
	Expr *ObjectExpr,
	ArrayRef<Expr *> ArgExprs,
	DeclAccessPair FoundDecl) {
	SourceRange R;
	if (!ArgExprs.empty()) {
	R = SourceRange(ArgExprs.front()->getBeginLoc(),
	ArgExprs.back()->getEndLoc());
	}

	return CheckMemberOperatorAccess(OpLoc, ObjectExpr, R, FoundDecl);
	}

	/// Checks access to the target of a friend declaration.
	Sema::AccessResult Sema::CheckFriendAccess(NamedDecl *target) {
	assert(isa<CXXMethodDecl>(target->getAsFunction()));

	// Friendship lookup is a redeclaration lookup, so there's never an
	// inheritance path modifying access.
	AccessSpecifier access = target->getAccess();

	if (!getLangOpts().AccessControl \|\| access == AS_public)
	return AR_accessible;

	CXXMethodDecl *method = cast<CXXMethodDecl>(target->getAsFunction());

	AccessTarget entity(Context, AccessTarget::Member,
	cast<CXXRecordDecl>(target->getDeclContext()),
	DeclAccessPair::make(target, access),
	/no instance context/ QualType());
	entity.setDiag(diag::err_access_friend_function)
	<< (method->getQualifier() ? method->getQualifierLoc().getSourceRange()
	: method->getNameInfo().getSourceRange());

	// We need to bypass delayed-diagnostics because we might be called
	// while the ParsingDeclarator is active.
	EffectiveContext EC(CurContext);
	switch (CheckEffectiveAccess(*this, EC, target->getLocation(), entity)) {
	case ::AR_accessible: return Sema::AR_accessible;
	case ::AR_inaccessible: return Sema::AR_inaccessible;
	case ::AR_dependent: return Sema::AR_dependent;
	}
	llvm_unreachable("invalid access result");
	}

	Sema::AccessResult Sema::CheckAddressOfMemberAccess(Expr *OvlExpr,
	DeclAccessPair Found) {
	if (!getLangOpts().AccessControl \|\|
	Found.getAccess() == AS_none \|\|
	Found.getAccess() == AS_public)
	return AR_accessible;

	OverloadExpr *Ovl = OverloadExpr::find(OvlExpr).Expression;
	CXXRecordDecl *NamingClass = Ovl->getNamingClass();

	AccessTarget Entity(Context, AccessTarget::Member, NamingClass, Found,
	/no instance context/ QualType());
	Entity.setDiag(diag::err_access)
	<< Ovl->getSourceRange();

	return CheckAccess(*this, Ovl->getNameLoc(), Entity);
	}

	/// Checks access for a hierarchy conversion.
	///
	/// \param ForceCheck true if this check should be performed even if access
	/// control is disabled; some things rely on this for semantics
	/// \param ForceUnprivileged true if this check should proceed as if the
	/// context had no special privileges
	Sema::AccessResult Sema::CheckBaseClassAccess(SourceLocation AccessLoc,
	QualType Base,
	QualType Derived,
	const CXXBasePath &Path,
	unsigned DiagID,
	bool ForceCheck,
	bool ForceUnprivileged) {
	if (!ForceCheck && !getLangOpts().AccessControl)
	return AR_accessible;

	if (Path.Access == AS_public)
	return AR_accessible;

	CXXRecordDecl BaseD, DerivedD;
	BaseD = cast<CXXRecordDecl>(Base->castAs<RecordType>()->getDecl());
	DerivedD = cast<CXXRecordDecl>(Derived->castAs<RecordType>()->getDecl());

	AccessTarget Entity(Context, AccessTarget::Base, BaseD, DerivedD,
	Path.Access);
	if (DiagID)
	Entity.setDiag(DiagID) << Derived << Base;

	if (ForceUnprivileged) {
	switch (CheckEffectiveAccess(*this, EffectiveContext(),
	AccessLoc, Entity)) {
	case ::AR_accessible: return Sema::AR_accessible;
	case ::AR_inaccessible: return Sema::AR_inaccessible;
	case ::AR_dependent: return Sema::AR_dependent;
	}
	llvm_unreachable("unexpected result from CheckEffectiveAccess");
	}
	return CheckAccess(*this, AccessLoc, Entity);
	}

	/// Checks access to all the declarations in the given result set.
	void Sema::CheckLookupAccess(const LookupResult &R) {
	assert(getLangOpts().AccessControl
	&& "performing access check without access control");
	assert(R.getNamingClass() && "performing access check without naming class");

	for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) {
	if (I.getAccess() != AS_public) {
	AccessTarget Entity(Context, AccessedEntity::Member,
	R.getNamingClass(), I.getPair(),
	R.getBaseObjectType());
	Entity.setDiag(diag::err_access);
	CheckAccess(*this, R.getNameLoc(), Entity);
	}
	}
	}

	/// Checks access to Target from the given class. The check will take access
	/// specifiers into account, but no member access expressions and such.
	///
	/// \param Target the declaration to check if it can be accessed
	/// \param NamingClass the class in which the lookup was started.
	/// \param BaseType type of the left side of member access expression.
	/// \p BaseType and \p NamingClass are used for C++ access control.
	/// Depending on the lookup case, they should be set to the following:
	/// - lhs.target (member access without a qualifier):
	/// \p BaseType and \p NamingClass are both the type of 'lhs'.
	/// - lhs.X::target (member access with a qualifier):
	/// BaseType is the type of 'lhs', NamingClass is 'X'
	/// - X::target (qualified lookup without member access):
	/// BaseType is null, NamingClass is 'X'.
	/// - target (unqualified lookup).
	/// BaseType is null, NamingClass is the parent class of 'target'.
	/// \return true if the Target is accessible from the Class, false otherwise.
	bool Sema::IsSimplyAccessible(NamedDecl Target, CXXRecordDecl NamingClass,
	QualType BaseType) {
	// Perform the C++ accessibility checks first.
	if (Target->isCXXClassMember() && NamingClass) {
	if (!getLangOpts().CPlusPlus)
	return false;
	// The unprivileged access is AS_none as we don't know how the member was
	// accessed, which is described by the access in DeclAccessPair.
	// `IsAccessible` will examine the actual access of Target (i.e.
	// Decl->getAccess()) when calculating the access.
	AccessTarget Entity(Context, AccessedEntity::Member, NamingClass,
	DeclAccessPair::make(Target, AS_none), BaseType);
	EffectiveContext EC(CurContext);
	return ::IsAccessible(*this, EC, Entity) != ::AR_inaccessible;
	}

	if (ObjCIvarDecl *Ivar = dyn_cast<ObjCIvarDecl>(Target)) {
	// @public and @package ivars are always accessible.
	if (Ivar->getCanonicalAccessControl() == ObjCIvarDecl::Public \|\|
	Ivar->getCanonicalAccessControl() == ObjCIvarDecl::Package)
	return true;

	// If we are inside a class or category implementation, determine the
	// interface we're in.
	ObjCInterfaceDecl *ClassOfMethodDecl = nullptr;
	if (ObjCMethodDecl *MD = getCurMethodDecl())
	ClassOfMethodDecl = MD->getClassInterface();
	else if (FunctionDecl *FD = getCurFunctionDecl()) {
	if (ObjCImplDecl *Impl
	= dyn_cast<ObjCImplDecl>(FD->getLexicalDeclContext())) {
	if (ObjCImplementationDecl *IMPD
	= dyn_cast<ObjCImplementationDecl>(Impl))
	ClassOfMethodDecl = IMPD->getClassInterface();
	else if (ObjCCategoryImplDecl* CatImplClass
	= dyn_cast<ObjCCategoryImplDecl>(Impl))
	ClassOfMethodDecl = CatImplClass->getClassInterface();
	}
	}

	// If we're not in an interface, this ivar is inaccessible.
	if (!ClassOfMethodDecl)
	return false;

	// If we're inside the same interface that owns the ivar, we're fine.
	if (declaresSameEntity(ClassOfMethodDecl, Ivar->getContainingInterface()))
	return true;

	// If the ivar is private, it's inaccessible.
	if (Ivar->getCanonicalAccessControl() == ObjCIvarDecl::Private)
	return false;

	return Ivar->getContainingInterface()->isSuperClassOf(ClassOfMethodDecl);
	}

	return true;
	}
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp
	index 051fad04219f..0fbef1cc8b52 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp
	@@ -1,19954 +1,19954 @@
	//===--- SemaDecl.cpp - Semantic Analysis for Declarations ----------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements semantic analysis for declarations.
	//
	//===----------------------------------------------------------------------===//

	#include "TypeLocBuilder.h"
	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTLambda.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/AST/CommentDiagnostic.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/EvaluatedExprVisitor.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/NonTrivialTypeVisitor.h"
	#include "clang/AST/Randstruct.h"
	#include "clang/AST/StmtCXX.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/HLSLRuntime.h"
	#include "clang/Basic/PartialDiagnostic.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Lex/HeaderSearch.h" // TODO: Sema shouldn't depend on Lex
	#include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
	#include "clang/Lex/ModuleLoader.h" // TODO: Sema shouldn't depend on Lex
	#include "clang/Lex/Preprocessor.h" // Included for isCodeCompletionEnabled()
	#include "clang/Sema/CXXFieldCollector.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/DelayedDiagnostic.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/ParsedTemplate.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/Template.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/Triple.h"
	#include <algorithm>
	#include <cstring>
	#include <functional>
	#include <optional>
	#include <unordered_map>

	using namespace clang;
	using namespace sema;

	Sema::DeclGroupPtrTy Sema::ConvertDeclToDeclGroup(Decl Ptr, Decl OwnedType) {
	if (OwnedType) {
	Decl *Group[2] = { OwnedType, Ptr };
	return DeclGroupPtrTy::make(DeclGroupRef::Create(Context, Group, 2));
	}

	return DeclGroupPtrTy::make(DeclGroupRef(Ptr));
	}

	namespace {

	class TypeNameValidatorCCC final : public CorrectionCandidateCallback {
	public:
	TypeNameValidatorCCC(bool AllowInvalid, bool WantClass = false,
	bool AllowTemplates = false,
	bool AllowNonTemplates = true)
	: AllowInvalidDecl(AllowInvalid), WantClassName(WantClass),
	AllowTemplates(AllowTemplates), AllowNonTemplates(AllowNonTemplates) {
	WantExpressionKeywords = false;
	WantCXXNamedCasts = false;
	WantRemainingKeywords = false;
	}

	bool ValidateCandidate(const TypoCorrection &candidate) override {
	if (NamedDecl *ND = candidate.getCorrectionDecl()) {
	if (!AllowInvalidDecl && ND->isInvalidDecl())
	return false;

	if (getAsTypeTemplateDecl(ND))
	return AllowTemplates;

	bool IsType = isa<TypeDecl>(ND) \|\| isa<ObjCInterfaceDecl>(ND);
	if (!IsType)
	return false;

	if (AllowNonTemplates)
	return true;

	// An injected-class-name of a class template (specialization) is valid
	// as a template or as a non-template.
	if (AllowTemplates) {
	auto *RD = dyn_cast<CXXRecordDecl>(ND);
	if (!RD \|\| !RD->isInjectedClassName())
	return false;
	RD = cast<CXXRecordDecl>(RD->getDeclContext());
	return RD->getDescribedClassTemplate() \|\|
	isa<ClassTemplateSpecializationDecl>(RD);
	}

	return false;
	}

	return !WantClassName && candidate.isKeyword();
	}

	std::unique_ptr<CorrectionCandidateCallback> clone() override {
	return std::make_unique<TypeNameValidatorCCC>(*this);
	}

	private:
	bool AllowInvalidDecl;
	bool WantClassName;
	bool AllowTemplates;
	bool AllowNonTemplates;
	};

	} // end anonymous namespace

	/// Determine whether the token kind starts a simple-type-specifier.
	bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
	switch (Kind) {
	// FIXME: Take into account the current language when deciding whether a
	// token kind is a valid type specifier
	case tok::kw_short:
	case tok::kw_long:
	case tok::kw___int64:
	case tok::kw___int128:
	case tok::kw_signed:
	case tok::kw_unsigned:
	case tok::kw_void:
	case tok::kw_char:
	case tok::kw_int:
	case tok::kw_half:
	case tok::kw_float:
	case tok::kw_double:
	case tok::kw___bf16:
	case tok::kw__Float16:
	case tok::kw___float128:
	case tok::kw___ibm128:
	case tok::kw_wchar_t:
	case tok::kw_bool:
	#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
	#include "clang/Basic/TransformTypeTraits.def"
	case tok::kw___auto_type:
	return true;

	case tok::annot_typename:
	case tok::kw_char16_t:
	case tok::kw_char32_t:
	case tok::kw_typeof:
	case tok::annot_decltype:
	case tok::kw_decltype:
	return getLangOpts().CPlusPlus;

	case tok::kw_char8_t:
	return getLangOpts().Char8;

	default:
	break;
	}

	return false;
	}

	namespace {
	enum class UnqualifiedTypeNameLookupResult {
	NotFound,
	FoundNonType,
	FoundType
	};
	} // end anonymous namespace

	/// Tries to perform unqualified lookup of the type decls in bases for
	/// dependent class.
	/// \return \a NotFound if no any decls is found, \a FoundNotType if found not a
	/// type decl, \a FoundType if only type decls are found.
	static UnqualifiedTypeNameLookupResult
	lookupUnqualifiedTypeNameInBase(Sema &S, const IdentifierInfo &II,
	SourceLocation NameLoc,
	const CXXRecordDecl *RD) {
	if (!RD->hasDefinition())
	return UnqualifiedTypeNameLookupResult::NotFound;
	// Look for type decls in base classes.
	UnqualifiedTypeNameLookupResult FoundTypeDecl =
	UnqualifiedTypeNameLookupResult::NotFound;
	for (const auto &Base : RD->bases()) {
	const CXXRecordDecl *BaseRD = nullptr;
	if (auto *BaseTT = Base.getType()->getAs<TagType>())
	BaseRD = BaseTT->getAsCXXRecordDecl();
	else if (auto *TST = Base.getType()->getAs<TemplateSpecializationType>()) {
	// Look for type decls in dependent base classes that have known primary
	// templates.
	if (!TST \|\| !TST->isDependentType())
	continue;
	auto *TD = TST->getTemplateName().getAsTemplateDecl();
	if (!TD)
	continue;
	if (auto *BasePrimaryTemplate =
	dyn_cast_or_null<CXXRecordDecl>(TD->getTemplatedDecl())) {
	if (BasePrimaryTemplate->getCanonicalDecl() != RD->getCanonicalDecl())
	BaseRD = BasePrimaryTemplate;
	else if (auto *CTD = dyn_cast<ClassTemplateDecl>(TD)) {
	if (const ClassTemplatePartialSpecializationDecl *PS =
	CTD->findPartialSpecialization(Base.getType()))
	if (PS->getCanonicalDecl() != RD->getCanonicalDecl())
	BaseRD = PS;
	}
	}
	}
	if (BaseRD) {
	for (NamedDecl *ND : BaseRD->lookup(&II)) {
	if (!isa<TypeDecl>(ND))
	return UnqualifiedTypeNameLookupResult::FoundNonType;
	FoundTypeDecl = UnqualifiedTypeNameLookupResult::FoundType;
	}
	if (FoundTypeDecl == UnqualifiedTypeNameLookupResult::NotFound) {
	switch (lookupUnqualifiedTypeNameInBase(S, II, NameLoc, BaseRD)) {
	case UnqualifiedTypeNameLookupResult::FoundNonType:
	return UnqualifiedTypeNameLookupResult::FoundNonType;
	case UnqualifiedTypeNameLookupResult::FoundType:
	FoundTypeDecl = UnqualifiedTypeNameLookupResult::FoundType;
	break;
	case UnqualifiedTypeNameLookupResult::NotFound:
	break;
	}
	}
	}
	}

	return FoundTypeDecl;
	}

	static ParsedType recoverFromTypeInKnownDependentBase(Sema &S,
	const IdentifierInfo &II,
	SourceLocation NameLoc) {
	// Lookup in the parent class template context, if any.
	const CXXRecordDecl *RD = nullptr;
	UnqualifiedTypeNameLookupResult FoundTypeDecl =
	UnqualifiedTypeNameLookupResult::NotFound;
	for (DeclContext *DC = S.CurContext;
	DC && FoundTypeDecl == UnqualifiedTypeNameLookupResult::NotFound;
	DC = DC->getParent()) {
	// Look for type decls in dependent base classes that have known primary
	// templates.
	RD = dyn_cast<CXXRecordDecl>(DC);
	if (RD && RD->getDescribedClassTemplate())
	FoundTypeDecl = lookupUnqualifiedTypeNameInBase(S, II, NameLoc, RD);
	}
	if (FoundTypeDecl != UnqualifiedTypeNameLookupResult::FoundType)
	return nullptr;

	// We found some types in dependent base classes. Recover as if the user
	// wrote 'typename MyClass::II' instead of 'II'. We'll fully resolve the
	// lookup during template instantiation.
	S.Diag(NameLoc, diag::ext_found_in_dependent_base) << &II;

	ASTContext &Context = S.Context;
	auto *NNS = NestedNameSpecifier::Create(Context, nullptr, false,
	cast<Type>(Context.getRecordType(RD)));
	QualType T = Context.getDependentNameType(ETK_Typename, NNS, &II);

	CXXScopeSpec SS;
	SS.MakeTrivial(Context, NNS, SourceRange(NameLoc));

	TypeLocBuilder Builder;
	DependentNameTypeLoc DepTL = Builder.push<DependentNameTypeLoc>(T);
	DepTL.setNameLoc(NameLoc);
	DepTL.setElaboratedKeywordLoc(SourceLocation());
	DepTL.setQualifierLoc(SS.getWithLocInContext(Context));
	return S.CreateParsedType(T, Builder.getTypeSourceInfo(Context, T));
	}

	/// Build a ParsedType for a simple-type-specifier with a nested-name-specifier.
	static ParsedType buildNamedType(Sema &S, const CXXScopeSpec *SS, QualType T,
	SourceLocation NameLoc,
	bool WantNontrivialTypeSourceInfo = true) {
	switch (T->getTypeClass()) {
	case Type::DeducedTemplateSpecialization:
	case Type::Enum:
	case Type::InjectedClassName:
	case Type::Record:
	case Type::Typedef:
	case Type::UnresolvedUsing:
	case Type::Using:
	break;
	// These can never be qualified so an ElaboratedType node
	// would carry no additional meaning.
	case Type::ObjCInterface:
	case Type::ObjCTypeParam:
	case Type::TemplateTypeParm:
	return ParsedType::make(T);
	default:
	llvm_unreachable("Unexpected Type Class");
	}

	if (!SS \|\| SS->isEmpty())
	return ParsedType::make(
	S.Context.getElaboratedType(ETK_None, nullptr, T, nullptr));

	QualType ElTy = S.getElaboratedType(ETK_None, *SS, T);
	if (!WantNontrivialTypeSourceInfo)
	return ParsedType::make(ElTy);

	TypeLocBuilder Builder;
	Builder.pushTypeSpec(T).setNameLoc(NameLoc);
	ElaboratedTypeLoc ElabTL = Builder.push<ElaboratedTypeLoc>(ElTy);
	ElabTL.setElaboratedKeywordLoc(SourceLocation());
	ElabTL.setQualifierLoc(SS->getWithLocInContext(S.Context));
	return S.CreateParsedType(ElTy, Builder.getTypeSourceInfo(S.Context, ElTy));
	}

	/// If the identifier refers to a type name within this scope,
	/// return the declaration of that type.
	///
	/// This routine performs ordinary name lookup of the identifier II
	/// within the given scope, with optional C++ scope specifier SS, to
	/// determine whether the name refers to a type. If so, returns an
	/// opaque pointer (actually a QualType) corresponding to that
	/// type. Otherwise, returns NULL.
	ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
	Scope S, CXXScopeSpec SS, bool isClassName,
	bool HasTrailingDot, ParsedType ObjectTypePtr,
	bool IsCtorOrDtorName,
	bool WantNontrivialTypeSourceInfo,
	bool IsClassTemplateDeductionContext,
	ImplicitTypenameContext AllowImplicitTypename,
	IdentifierInfo **CorrectedII) {
	// FIXME: Consider allowing this outside C++1z mode as an extension.
	bool AllowDeducedTemplate = IsClassTemplateDeductionContext &&
	getLangOpts().CPlusPlus17 && !IsCtorOrDtorName &&
	!isClassName && !HasTrailingDot;

	// Determine where we will perform name lookup.
	DeclContext *LookupCtx = nullptr;
	if (ObjectTypePtr) {
	QualType ObjectType = ObjectTypePtr.get();
	if (ObjectType->isRecordType())
	LookupCtx = computeDeclContext(ObjectType);
	} else if (SS && SS->isNotEmpty()) {
	LookupCtx = computeDeclContext(*SS, false);

	if (!LookupCtx) {
	if (isDependentScopeSpecifier(*SS)) {
	// C++ [temp.res]p3:
	// A qualified-id that refers to a type and in which the
	// nested-name-specifier depends on a template-parameter (14.6.2)
	// shall be prefixed by the keyword typename to indicate that the
	// qualified-id denotes a type, forming an
	// elaborated-type-specifier (7.1.5.3).
	//
	// We therefore do not perform any name lookup if the result would
	// refer to a member of an unknown specialization.
	// In C++2a, in several contexts a 'typename' is not required. Also
	// allow this as an extension.
	if (AllowImplicitTypename == ImplicitTypenameContext::No &&
	!isClassName && !IsCtorOrDtorName)
	return nullptr;
	bool IsImplicitTypename = !isClassName && !IsCtorOrDtorName;
	if (IsImplicitTypename) {
	SourceLocation QualifiedLoc = SS->getRange().getBegin();
	if (getLangOpts().CPlusPlus20)
	Diag(QualifiedLoc, diag::warn_cxx17_compat_implicit_typename);
	else
	Diag(QualifiedLoc, diag::ext_implicit_typename)
	<< SS->getScopeRep() << II.getName()
	<< FixItHint::CreateInsertion(QualifiedLoc, "typename ");
	}

	// We know from the grammar that this name refers to a type,
	// so build a dependent node to describe the type.
	if (WantNontrivialTypeSourceInfo)
	return ActOnTypenameType(S, SourceLocation(), *SS, II, NameLoc,
	(ImplicitTypenameContext)IsImplicitTypename)
	.get();

	NestedNameSpecifierLoc QualifierLoc = SS->getWithLocInContext(Context);
	QualType T =
	CheckTypenameType(IsImplicitTypename ? ETK_Typename : ETK_None,
	SourceLocation(), QualifierLoc, II, NameLoc);
	return ParsedType::make(T);
	}

	return nullptr;
	}

	if (!LookupCtx->isDependentContext() &&
	RequireCompleteDeclContext(*SS, LookupCtx))
	return nullptr;
	}

	// FIXME: LookupNestedNameSpecifierName isn't the right kind of
	// lookup for class-names.
	LookupNameKind Kind = isClassName ? LookupNestedNameSpecifierName :
	LookupOrdinaryName;
	LookupResult Result(*this, &II, NameLoc, Kind);
	if (LookupCtx) {
	// Perform "qualified" name lookup into the declaration context we
	// computed, which is either the type of the base of a member access
	// expression or the declaration context associated with a prior
	// nested-name-specifier.
	LookupQualifiedName(Result, LookupCtx);

	if (ObjectTypePtr && Result.empty()) {
	// C++ [basic.lookup.classref]p3:
	// If the unqualified-id is ~type-name, the type-name is looked up
	// in the context of the entire postfix-expression. If the type T of
	// the object expression is of a class type C, the type-name is also
	// looked up in the scope of class C. At least one of the lookups shall
	// find a name that refers to (possibly cv-qualified) T.
	LookupName(Result, S);
	}
	} else {
	// Perform unqualified name lookup.
	LookupName(Result, S);

	// For unqualified lookup in a class template in MSVC mode, look into
	// dependent base classes where the primary class template is known.
	if (Result.empty() && getLangOpts().MSVCCompat && (!SS \|\| SS->isEmpty())) {
	if (ParsedType TypeInBase =
	recoverFromTypeInKnownDependentBase(*this, II, NameLoc))
	return TypeInBase;
	}
	}

	NamedDecl *IIDecl = nullptr;
	UsingShadowDecl *FoundUsingShadow = nullptr;
	switch (Result.getResultKind()) {
	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	if (CorrectedII) {
	TypeNameValidatorCCC CCC(/AllowInvalid=/true, isClassName,
	AllowDeducedTemplate);
	TypoCorrection Correction = CorrectTypo(Result.getLookupNameInfo(), Kind,
	S, SS, CCC, CTK_ErrorRecovery);
	IdentifierInfo *NewII = Correction.getCorrectionAsIdentifierInfo();
	TemplateTy Template;
	bool MemberOfUnknownSpecialization;
	UnqualifiedId TemplateName;
	TemplateName.setIdentifier(NewII, NameLoc);
	NestedNameSpecifier *NNS = Correction.getCorrectionSpecifier();
	CXXScopeSpec NewSS, *NewSSPtr = SS;
	if (SS && NNS) {
	NewSS.MakeTrivial(Context, NNS, SourceRange(NameLoc));
	NewSSPtr = &NewSS;
	}
	if (Correction && (NNS \|\| NewII != &II) &&
	// Ignore a correction to a template type as the to-be-corrected
	// identifier is not a template (typo correction for template names
	// is handled elsewhere).
	!(getLangOpts().CPlusPlus && NewSSPtr &&
	isTemplateName(S, *NewSSPtr, false, TemplateName, nullptr, false,
	Template, MemberOfUnknownSpecialization))) {
	ParsedType Ty = getTypeName(*NewII, NameLoc, S, NewSSPtr,
	isClassName, HasTrailingDot, ObjectTypePtr,
	IsCtorOrDtorName,
	WantNontrivialTypeSourceInfo,
	IsClassTemplateDeductionContext);
	if (Ty) {
	diagnoseTypo(Correction,
	PDiag(diag::err_unknown_type_or_class_name_suggest)
	<< Result.getLookupName() << isClassName);
	if (SS && NNS)
	SS->MakeTrivial(Context, NNS, SourceRange(NameLoc));
	*CorrectedII = NewII;
	return Ty;
	}
	}
	}
	// If typo correction failed or was not performed, fall through
	[[fallthrough]];
	case LookupResult::FoundOverloaded:
	case LookupResult::FoundUnresolvedValue:
	Result.suppressDiagnostics();
	return nullptr;

	case LookupResult::Ambiguous:
	// Recover from type-hiding ambiguities by hiding the type. We'll
	// do the lookup again when looking for an object, and we can
	// diagnose the error then. If we don't do this, then the error
	// about hiding the type will be immediately followed by an error
	// that only makes sense if the identifier was treated like a type.
	if (Result.getAmbiguityKind() == LookupResult::AmbiguousTagHiding) {
	Result.suppressDiagnostics();
	return nullptr;
	}

	// Look to see if we have a type anywhere in the list of results.
	for (LookupResult::iterator Res = Result.begin(), ResEnd = Result.end();
	Res != ResEnd; ++Res) {
	NamedDecl RealRes = (Res)->getUnderlyingDecl();
	if (isa<TypeDecl, ObjCInterfaceDecl, UnresolvedUsingIfExistsDecl>(
	RealRes) \|\|
	(AllowDeducedTemplate && getAsTypeTemplateDecl(RealRes))) {
	if (!IIDecl \|\|
	// Make the selection of the recovery decl deterministic.
	RealRes->getLocation() < IIDecl->getLocation()) {
	IIDecl = RealRes;
	FoundUsingShadow = dyn_cast<UsingShadowDecl>(*Res);
	}
	}
	}

	if (!IIDecl) {
	// None of the entities we found is a type, so there is no way
	// to even assume that the result is a type. In this case, don't
	// complain about the ambiguity. The parser will either try to
	// perform this lookup again (e.g., as an object name), which
	// will produce the ambiguity, or will complain that it expected
	// a type name.
	Result.suppressDiagnostics();
	return nullptr;
	}

	// We found a type within the ambiguous lookup; diagnose the
	// ambiguity and then return that type. This might be the right
	// answer, or it might not be, but it suppresses any attempt to
	// perform the name lookup again.
	break;

	case LookupResult::Found:
	IIDecl = Result.getFoundDecl();
	FoundUsingShadow = dyn_cast<UsingShadowDecl>(*Result.begin());
	break;
	}

	assert(IIDecl && "Didn't find decl");

	QualType T;
	if (TypeDecl *TD = dyn_cast<TypeDecl>(IIDecl)) {
	// C++ [class.qual]p2: A lookup that would find the injected-class-name
	// instead names the constructors of the class, except when naming a class.
	// This is ill-formed when we're not actually forming a ctor or dtor name.
	auto *LookupRD = dyn_cast_or_null<CXXRecordDecl>(LookupCtx);
	auto *FoundRD = dyn_cast<CXXRecordDecl>(TD);
	if (!isClassName && !IsCtorOrDtorName && LookupRD && FoundRD &&
	FoundRD->isInjectedClassName() &&
	declaresSameEntity(LookupRD, cast<Decl>(FoundRD->getParent())))
	Diag(NameLoc, diag::err_out_of_line_qualified_id_type_names_constructor)
	<< &II << /Type/1;

	DiagnoseUseOfDecl(IIDecl, NameLoc);

	T = Context.getTypeDeclType(TD);
	MarkAnyDeclReferenced(TD->getLocation(), TD, /OdrUse=/false);
	} else if (ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(IIDecl)) {
	(void)DiagnoseUseOfDecl(IDecl, NameLoc);
	if (!HasTrailingDot)
	T = Context.getObjCInterfaceType(IDecl);
	FoundUsingShadow = nullptr; // FIXME: Target must be a TypeDecl.
	} else if (auto *UD = dyn_cast<UnresolvedUsingIfExistsDecl>(IIDecl)) {
	(void)DiagnoseUseOfDecl(UD, NameLoc);
	// Recover with 'int'
	return ParsedType::make(Context.IntTy);
	} else if (AllowDeducedTemplate) {
	if (auto *TD = getAsTypeTemplateDecl(IIDecl)) {
	assert(!FoundUsingShadow \|\| FoundUsingShadow->getTargetDecl() == TD);
	TemplateName Template =
	FoundUsingShadow ? TemplateName(FoundUsingShadow) : TemplateName(TD);
	T = Context.getDeducedTemplateSpecializationType(Template, QualType(),
	false);
	// Don't wrap in a further UsingType.
	FoundUsingShadow = nullptr;
	}
	}

	if (T.isNull()) {
	// If it's not plausibly a type, suppress diagnostics.
	Result.suppressDiagnostics();
	return nullptr;
	}

	if (FoundUsingShadow)
	T = Context.getUsingType(FoundUsingShadow, T);

	return buildNamedType(*this, SS, T, NameLoc, WantNontrivialTypeSourceInfo);
	}

	// Builds a fake NNS for the given decl context.
	static NestedNameSpecifier *
	synthesizeCurrentNestedNameSpecifier(ASTContext &Context, DeclContext *DC) {
	for (;; DC = DC->getLookupParent()) {
	DC = DC->getPrimaryContext();
	auto *ND = dyn_cast<NamespaceDecl>(DC);
	if (ND && !ND->isInline() && !ND->isAnonymousNamespace())
	return NestedNameSpecifier::Create(Context, nullptr, ND);
	else if (auto *RD = dyn_cast<CXXRecordDecl>(DC))
	return NestedNameSpecifier::Create(Context, nullptr, RD->isTemplateDecl(),
	RD->getTypeForDecl());
	else if (isa<TranslationUnitDecl>(DC))
	return NestedNameSpecifier::GlobalSpecifier(Context);
	}
	llvm_unreachable("something isn't in TU scope?");
	}

	/// Find the parent class with dependent bases of the innermost enclosing method
	/// context. Do not look for enclosing CXXRecordDecls directly, or we will end
	/// up allowing unqualified dependent type names at class-level, which MSVC
	/// correctly rejects.
	static const CXXRecordDecl *
	findRecordWithDependentBasesOfEnclosingMethod(const DeclContext *DC) {
	for (; DC && DC->isDependentContext(); DC = DC->getLookupParent()) {
	DC = DC->getPrimaryContext();
	if (const auto *MD = dyn_cast<CXXMethodDecl>(DC))
	if (MD->getParent()->hasAnyDependentBases())
	return MD->getParent();
	}
	return nullptr;
	}

	ParsedType Sema::ActOnMSVCUnknownTypeName(const IdentifierInfo &II,
	SourceLocation NameLoc,
	bool IsTemplateTypeArg) {
	assert(getLangOpts().MSVCCompat && "shouldn't be called in non-MSVC mode");

	NestedNameSpecifier *NNS = nullptr;
	if (IsTemplateTypeArg && getCurScope()->isTemplateParamScope()) {
	// If we weren't able to parse a default template argument, delay lookup
	// until instantiation time by making a non-dependent DependentTypeName. We
	// pretend we saw a NestedNameSpecifier referring to the current scope, and
	// lookup is retried.
	// FIXME: This hurts our diagnostic quality, since we get errors like "no
	// type named 'Foo' in 'current_namespace'" when the user didn't write any
	// name specifiers.
	NNS = synthesizeCurrentNestedNameSpecifier(Context, CurContext);
	Diag(NameLoc, diag::ext_ms_delayed_template_argument) << &II;
	} else if (const CXXRecordDecl *RD =
	findRecordWithDependentBasesOfEnclosingMethod(CurContext)) {
	// Build a DependentNameType that will perform lookup into RD at
	// instantiation time.
	NNS = NestedNameSpecifier::Create(Context, nullptr, RD->isTemplateDecl(),
	RD->getTypeForDecl());

	// Diagnose that this identifier was undeclared, and retry the lookup during
	// template instantiation.
	Diag(NameLoc, diag::ext_undeclared_unqual_id_with_dependent_base) << &II
	<< RD;
	} else {
	// This is not a situation that we should recover from.
	return ParsedType();
	}

	QualType T = Context.getDependentNameType(ETK_None, NNS, &II);

	// Build type location information. We synthesized the qualifier, so we have
	// to build a fake NestedNameSpecifierLoc.
	NestedNameSpecifierLocBuilder NNSLocBuilder;
	NNSLocBuilder.MakeTrivial(Context, NNS, SourceRange(NameLoc));
	NestedNameSpecifierLoc QualifierLoc = NNSLocBuilder.getWithLocInContext(Context);

	TypeLocBuilder Builder;
	DependentNameTypeLoc DepTL = Builder.push<DependentNameTypeLoc>(T);
	DepTL.setNameLoc(NameLoc);
	DepTL.setElaboratedKeywordLoc(SourceLocation());
	DepTL.setQualifierLoc(QualifierLoc);
	return CreateParsedType(T, Builder.getTypeSourceInfo(Context, T));
	}

	/// isTagName() - This method is called for error recovery purposes only
	/// to determine if the specified name is a valid tag name ("struct foo"). If
	/// so, this returns the TST for the tag corresponding to it (TST_enum,
	/// TST_union, TST_struct, TST_interface, TST_class). This is used to diagnose
	/// cases in C where the user forgot to specify the tag.
	DeclSpec::TST Sema::isTagName(IdentifierInfo &II, Scope *S) {
	// Do a tag name lookup in this scope.
	LookupResult R(*this, &II, SourceLocation(), LookupTagName);
	LookupName(R, S, false);
	R.suppressDiagnostics();
	if (R.getResultKind() == LookupResult::Found)
	if (const TagDecl *TD = R.getAsSingle<TagDecl>()) {
	switch (TD->getTagKind()) {
	case TTK_Struct: return DeclSpec::TST_struct;
	case TTK_Interface: return DeclSpec::TST_interface;
	case TTK_Union: return DeclSpec::TST_union;
	case TTK_Class: return DeclSpec::TST_class;
	case TTK_Enum: return DeclSpec::TST_enum;
	}
	}

	return DeclSpec::TST_unspecified;
	}

	/// isMicrosoftMissingTypename - In Microsoft mode, within class scope,
	/// if a CXXScopeSpec's type is equal to the type of one of the base classes
	/// then downgrade the missing typename error to a warning.
	/// This is needed for MSVC compatibility; Example:
	/// @code
	/// template<class T> class A {
	/// public:
	/// typedef int TYPE;
	/// };
	/// template<class T> class B : public A<T> {
	/// public:
	/// A<T>::TYPE a; // no typename required because A<T> is a base class.
	/// };
	/// @endcode
	bool Sema::isMicrosoftMissingTypename(const CXXScopeSpec SS, Scope S) {
	if (CurContext->isRecord()) {
	if (SS->getScopeRep()->getKind() == NestedNameSpecifier::Super)
	return true;

	const Type *Ty = SS->getScopeRep()->getAsType();

	CXXRecordDecl *RD = cast<CXXRecordDecl>(CurContext);
	for (const auto &Base : RD->bases())
	if (Ty && Context.hasSameUnqualifiedType(QualType(Ty, 1), Base.getType()))
	return true;
	return S->isFunctionPrototypeScope();
	}
	return CurContext->isFunctionOrMethod() \|\| S->isFunctionPrototypeScope();
	}

	void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
	SourceLocation IILoc,
	Scope *S,
	CXXScopeSpec *SS,
	ParsedType &SuggestedType,
	bool IsTemplateName) {
	// Don't report typename errors for editor placeholders.
	if (II->isEditorPlaceholder())
	return;
	// We don't have anything to suggest (yet).
	SuggestedType = nullptr;

	// There may have been a typo in the name of the type. Look up typo
	// results, in case we have something that we can suggest.
	TypeNameValidatorCCC CCC(/AllowInvalid=/false, /WantClass=/false,
	/AllowTemplates=/IsTemplateName,
	/AllowNonTemplates=/!IsTemplateName);
	if (TypoCorrection Corrected =
	CorrectTypo(DeclarationNameInfo(II, IILoc), LookupOrdinaryName, S, SS,
	CCC, CTK_ErrorRecovery)) {
	// FIXME: Support error recovery for the template-name case.
	bool CanRecover = !IsTemplateName;
	if (Corrected.isKeyword()) {
	// We corrected to a keyword.
	diagnoseTypo(Corrected,
	PDiag(IsTemplateName ? diag::err_no_template_suggest
	: diag::err_unknown_typename_suggest)
	<< II);
	II = Corrected.getCorrectionAsIdentifierInfo();
	} else {
	// We found a similarly-named type or interface; suggest that.
	if (!SS \|\| !SS->isSet()) {
	diagnoseTypo(Corrected,
	PDiag(IsTemplateName ? diag::err_no_template_suggest
	: diag::err_unknown_typename_suggest)
	<< II, CanRecover);
	} else if (DeclContext DC = computeDeclContext(SS, false)) {
	std::string CorrectedStr(Corrected.getAsString(getLangOpts()));
	bool DroppedSpecifier = Corrected.WillReplaceSpecifier() &&
	II->getName().equals(CorrectedStr);
	diagnoseTypo(Corrected,
	PDiag(IsTemplateName
	? diag::err_no_member_template_suggest
	: diag::err_unknown_nested_typename_suggest)
	<< II << DC << DroppedSpecifier << SS->getRange(),
	CanRecover);
	} else {
	llvm_unreachable("could not have corrected a typo here");
	}

	if (!CanRecover)
	return;

	CXXScopeSpec tmpSS;
	if (Corrected.getCorrectionSpecifier())
	tmpSS.MakeTrivial(Context, Corrected.getCorrectionSpecifier(),
	SourceRange(IILoc));
	// FIXME: Support class template argument deduction here.
	SuggestedType =
	getTypeName(*Corrected.getCorrectionAsIdentifierInfo(), IILoc, S,
	tmpSS.isSet() ? &tmpSS : SS, false, false, nullptr,
	/IsCtorOrDtorName=/false,
	/WantNontrivialTypeSourceInfo=/true);
	}
	return;
	}

	if (getLangOpts().CPlusPlus && !IsTemplateName) {
	// See if II is a class template that the user forgot to pass arguments to.
	UnqualifiedId Name;
	Name.setIdentifier(II, IILoc);
	CXXScopeSpec EmptySS;
	TemplateTy TemplateResult;
	bool MemberOfUnknownSpecialization;
	if (isTemplateName(S, SS ? SS : EmptySS, /hasTemplateKeyword=*/false,
	Name, nullptr, true, TemplateResult,
	MemberOfUnknownSpecialization) == TNK_Type_template) {
	diagnoseMissingTemplateArguments(TemplateResult.get(), IILoc);
	return;
	}
	}

	// FIXME: Should we move the logic that tries to recover from a missing tag
	// (struct, union, enum) from Parser::ParseImplicitInt here, instead?

	if (!SS \|\| (!SS->isSet() && !SS->isInvalid()))
	Diag(IILoc, IsTemplateName ? diag::err_no_template
	: diag::err_unknown_typename)
	<< II;
	else if (DeclContext DC = computeDeclContext(SS, false))
	Diag(IILoc, IsTemplateName ? diag::err_no_member_template
	: diag::err_typename_nested_not_found)
	<< II << DC << SS->getRange();
	else if (SS->isValid() && SS->getScopeRep()->containsErrors()) {
	SuggestedType =
	ActOnTypenameType(S, SourceLocation(), SS, II, IILoc).get();
	} else if (isDependentScopeSpecifier(*SS)) {
	unsigned DiagID = diag::err_typename_missing;
	if (getLangOpts().MSVCCompat && isMicrosoftMissingTypename(SS, S))
	DiagID = diag::ext_typename_missing;

	Diag(SS->getRange().getBegin(), DiagID)
	<< SS->getScopeRep() << II->getName()
	<< SourceRange(SS->getRange().getBegin(), IILoc)
	<< FixItHint::CreateInsertion(SS->getRange().getBegin(), "typename ");
	SuggestedType = ActOnTypenameType(S, SourceLocation(),
	SS, II, IILoc).get();
	} else {
	assert(SS && SS->isInvalid() &&
	"Invalid scope specifier has already been diagnosed");
	}
	}

	/// Determine whether the given result set contains either a type name
	/// or
	static bool isResultTypeOrTemplate(LookupResult &R, const Token &NextToken) {
	bool CheckTemplate = R.getSema().getLangOpts().CPlusPlus &&
	NextToken.is(tok::less);

	for (LookupResult::iterator I = R.begin(), IEnd = R.end(); I != IEnd; ++I) {
	if (isa<TypeDecl>(I) \|\| isa<ObjCInterfaceDecl>(I))
	return true;

	if (CheckTemplate && isa<TemplateDecl>(*I))
	return true;
	}

	return false;
	}

	static bool isTagTypeWithMissingTag(Sema &SemaRef, LookupResult &Result,
	Scope *S, CXXScopeSpec &SS,
	IdentifierInfo *&Name,
	SourceLocation NameLoc) {
	LookupResult R(SemaRef, Name, NameLoc, Sema::LookupTagName);
	SemaRef.LookupParsedName(R, S, &SS);
	if (TagDecl *Tag = R.getAsSingle<TagDecl>()) {
	StringRef FixItTagName;
	switch (Tag->getTagKind()) {
	case TTK_Class:
	FixItTagName = "class ";
	break;

	case TTK_Enum:
	FixItTagName = "enum ";
	break;

	case TTK_Struct:
	FixItTagName = "struct ";
	break;

	case TTK_Interface:
	FixItTagName = "__interface ";
	break;

	case TTK_Union:
	FixItTagName = "union ";
	break;
	}

	StringRef TagName = FixItTagName.drop_back();
	SemaRef.Diag(NameLoc, diag::err_use_of_tag_name_without_tag)
	<< Name << TagName << SemaRef.getLangOpts().CPlusPlus
	<< FixItHint::CreateInsertion(NameLoc, FixItTagName);

	for (LookupResult::iterator I = Result.begin(), IEnd = Result.end();
	I != IEnd; ++I)
	SemaRef.Diag((*I)->getLocation(), diag::note_decl_hiding_tag_type)
	<< Name << TagName;

	// Replace lookup results with just the tag decl.
	Result.clear(Sema::LookupTagName);
	SemaRef.LookupParsedName(Result, S, &SS);
	return true;
	}

	return false;
	}

	Sema::NameClassification Sema::ClassifyName(Scope *S, CXXScopeSpec &SS,
	IdentifierInfo *&Name,
	SourceLocation NameLoc,
	const Token &NextToken,
	CorrectionCandidateCallback *CCC) {
	DeclarationNameInfo NameInfo(Name, NameLoc);
	ObjCMethodDecl *CurMethod = getCurMethodDecl();

	assert(NextToken.isNot(tok::coloncolon) &&
	"parse nested name specifiers before calling ClassifyName");
	if (getLangOpts().CPlusPlus && SS.isSet() &&
	isCurrentClassName(*Name, S, &SS)) {
	// Per [class.qual]p2, this names the constructors of SS, not the
	// injected-class-name. We don't have a classification for that.
	// There's not much point caching this result, since the parser
	// will reject it later.
	return NameClassification::Unknown();
	}

	LookupResult Result(*this, Name, NameLoc, LookupOrdinaryName);
	LookupParsedName(Result, S, &SS, !CurMethod);

	if (SS.isInvalid())
	return NameClassification::Error();

	// For unqualified lookup in a class template in MSVC mode, look into
	// dependent base classes where the primary class template is known.
	if (Result.empty() && SS.isEmpty() && getLangOpts().MSVCCompat) {
	if (ParsedType TypeInBase =
	recoverFromTypeInKnownDependentBase(this, Name, NameLoc))
	return TypeInBase;
	}

	// Perform lookup for Objective-C instance variables (including automatically
	// synthesized instance variables), if we're in an Objective-C method.
	// FIXME: This lookup really, really needs to be folded in to the normal
	// unqualified lookup mechanism.
	if (SS.isEmpty() && CurMethod && !isResultTypeOrTemplate(Result, NextToken)) {
	DeclResult Ivar = LookupIvarInObjCMethod(Result, S, Name);
	if (Ivar.isInvalid())
	return NameClassification::Error();
	if (Ivar.isUsable())
	return NameClassification::NonType(cast<NamedDecl>(Ivar.get()));

	// We defer builtin creation until after ivar lookup inside ObjC methods.
	if (Result.empty())
	LookupBuiltin(Result);
	}

	bool SecondTry = false;
	bool IsFilteredTemplateName = false;

	Corrected:
	switch (Result.getResultKind()) {
	case LookupResult::NotFound:
	// If an unqualified-id is followed by a '(', then we have a function
	// call.
	if (SS.isEmpty() && NextToken.is(tok::l_paren)) {
	// In C++, this is an ADL-only call.
	// FIXME: Reference?
	if (getLangOpts().CPlusPlus)
	return NameClassification::UndeclaredNonType();

	// C90 6.3.2.2:
	// If the expression that precedes the parenthesized argument list in a
	// function call consists solely of an identifier, and if no
	// declaration is visible for this identifier, the identifier is
	// implicitly declared exactly as if, in the innermost block containing
	// the function call, the declaration
	//
	// extern int identifier ();
	//
	// appeared.
	//
	// We also allow this in C99 as an extension. However, this is not
	// allowed in all language modes as functions without prototypes may not
	// be supported.
	if (getLangOpts().implicitFunctionsAllowed()) {
	if (NamedDecl D = ImplicitlyDefineFunction(NameLoc, Name, S))
	return NameClassification::NonType(D);
	}
	}

	if (getLangOpts().CPlusPlus20 && SS.isEmpty() && NextToken.is(tok::less)) {
	// In C++20 onwards, this could be an ADL-only call to a function
	// template, and we're required to assume that this is a template name.
	//
	// FIXME: Find a way to still do typo correction in this case.
	TemplateName Template =
	Context.getAssumedTemplateName(NameInfo.getName());
	return NameClassification::UndeclaredTemplate(Template);
	}

	// In C, we first see whether there is a tag type by the same name, in
	// which case it's likely that the user just forgot to write "enum",
	// "struct", or "union".
	if (!getLangOpts().CPlusPlus && !SecondTry &&
	isTagTypeWithMissingTag(*this, Result, S, SS, Name, NameLoc)) {
	break;
	}

	// Perform typo correction to determine if there is another name that is
	// close to this name.
	if (!SecondTry && CCC) {
	SecondTry = true;
	if (TypoCorrection Corrected =
	CorrectTypo(Result.getLookupNameInfo(), Result.getLookupKind(), S,
	&SS, *CCC, CTK_ErrorRecovery)) {
	unsigned UnqualifiedDiag = diag::err_undeclared_var_use_suggest;
	unsigned QualifiedDiag = diag::err_no_member_suggest;

	NamedDecl *FirstDecl = Corrected.getFoundDecl();
	NamedDecl *UnderlyingFirstDecl = Corrected.getCorrectionDecl();
	if (getLangOpts().CPlusPlus && NextToken.is(tok::less) &&
	UnderlyingFirstDecl && isa<TemplateDecl>(UnderlyingFirstDecl)) {
	UnqualifiedDiag = diag::err_no_template_suggest;
	QualifiedDiag = diag::err_no_member_template_suggest;
	} else if (UnderlyingFirstDecl &&
	(isa<TypeDecl>(UnderlyingFirstDecl) \|\|
	isa<ObjCInterfaceDecl>(UnderlyingFirstDecl) \|\|
	isa<ObjCCompatibleAliasDecl>(UnderlyingFirstDecl))) {
	UnqualifiedDiag = diag::err_unknown_typename_suggest;
	QualifiedDiag = diag::err_unknown_nested_typename_suggest;
	}

	if (SS.isEmpty()) {
	diagnoseTypo(Corrected, PDiag(UnqualifiedDiag) << Name);
	} else {// FIXME: is this even reachable? Test it.
	std::string CorrectedStr(Corrected.getAsString(getLangOpts()));
	bool DroppedSpecifier = Corrected.WillReplaceSpecifier() &&
	Name->getName().equals(CorrectedStr);
	diagnoseTypo(Corrected, PDiag(QualifiedDiag)
	<< Name << computeDeclContext(SS, false)
	<< DroppedSpecifier << SS.getRange());
	}

	// Update the name, so that the caller has the new name.
	Name = Corrected.getCorrectionAsIdentifierInfo();

	// Typo correction corrected to a keyword.
	if (Corrected.isKeyword())
	return Name;

	// Also update the LookupResult...
	// FIXME: This should probably go away at some point
	Result.clear();
	Result.setLookupName(Corrected.getCorrection());
	if (FirstDecl)
	Result.addDecl(FirstDecl);

	// If we found an Objective-C instance variable, let
	// LookupInObjCMethod build the appropriate expression to
	// reference the ivar.
	// FIXME: This is a gross hack.
	if (ObjCIvarDecl *Ivar = Result.getAsSingle<ObjCIvarDecl>()) {
	DeclResult R =
	LookupIvarInObjCMethod(Result, S, Ivar->getIdentifier());
	if (R.isInvalid())
	return NameClassification::Error();
	if (R.isUsable())
	return NameClassification::NonType(Ivar);
	}

	goto Corrected;
	}
	}

	// We failed to correct; just fall through and let the parser deal with it.
	Result.suppressDiagnostics();
	return NameClassification::Unknown();

	case LookupResult::NotFoundInCurrentInstantiation: {
	// We performed name lookup into the current instantiation, and there were
	// dependent bases, so we treat this result the same way as any other
	// dependent nested-name-specifier.

	// C++ [temp.res]p2:
	// A name used in a template declaration or definition and that is
	// dependent on a template-parameter is assumed not to name a type
	// unless the applicable name lookup finds a type name or the name is
	// qualified by the keyword typename.
	//
	// FIXME: If the next token is '<', we might want to ask the parser to
	// perform some heroics to see if we actually have a
	// template-argument-list, which would indicate a missing 'template'
	// keyword here.
	return NameClassification::DependentNonType();
	}

	case LookupResult::Found:
	case LookupResult::FoundOverloaded:
	case LookupResult::FoundUnresolvedValue:
	break;

	case LookupResult::Ambiguous:
	if (getLangOpts().CPlusPlus && NextToken.is(tok::less) &&
	hasAnyAcceptableTemplateNames(Result, /AllowFunctionTemplates=/true,
	/AllowDependent=/false)) {
	// C++ [temp.local]p3:
	// A lookup that finds an injected-class-name (10.2) can result in an
	// ambiguity in certain cases (for example, if it is found in more than
	// one base class). If all of the injected-class-names that are found
	// refer to specializations of the same class template, and if the name
	// is followed by a template-argument-list, the reference refers to the
	// class template itself and not a specialization thereof, and is not
	// ambiguous.
	//
	// This filtering can make an ambiguous result into an unambiguous one,
	// so try again after filtering out template names.
	FilterAcceptableTemplateNames(Result);
	if (!Result.isAmbiguous()) {
	IsFilteredTemplateName = true;
	break;
	}
	}

	// Diagnose the ambiguity and return an error.
	return NameClassification::Error();
	}

	if (getLangOpts().CPlusPlus && NextToken.is(tok::less) &&
	(IsFilteredTemplateName \|\|
	hasAnyAcceptableTemplateNames(
	Result, /AllowFunctionTemplates=/true,
	/AllowDependent=/false,
	/AllowNonTemplateFunctions/ SS.isEmpty() &&
	getLangOpts().CPlusPlus20))) {
	// C++ [temp.names]p3:
	// After name lookup (3.4) finds that a name is a template-name or that
	// an operator-function-id or a literal- operator-id refers to a set of
	// overloaded functions any member of which is a function template if
	// this is followed by a <, the < is always taken as the delimiter of a
	// template-argument-list and never as the less-than operator.
	// C++2a [temp.names]p2:
	// A name is also considered to refer to a template if it is an
	// unqualified-id followed by a < and name lookup finds either one
	// or more functions or finds nothing.
	if (!IsFilteredTemplateName)
	FilterAcceptableTemplateNames(Result);

	bool IsFunctionTemplate;
	bool IsVarTemplate;
	TemplateName Template;
	if (Result.end() - Result.begin() > 1) {
	IsFunctionTemplate = true;
	Template = Context.getOverloadedTemplateName(Result.begin(),
	Result.end());
	} else if (!Result.empty()) {
	auto *TD = cast<TemplateDecl>(getAsTemplateNameDecl(
	Result.begin(), /AllowFunctionTemplates=*/true,
	/AllowDependent=/false));
	IsFunctionTemplate = isa<FunctionTemplateDecl>(TD);
	IsVarTemplate = isa<VarTemplateDecl>(TD);

	UsingShadowDecl *FoundUsingShadow =
	dyn_cast<UsingShadowDecl>(*Result.begin());
	assert(!FoundUsingShadow \|\|
	TD == cast<TemplateDecl>(FoundUsingShadow->getTargetDecl()));
	Template =
	FoundUsingShadow ? TemplateName(FoundUsingShadow) : TemplateName(TD);
	if (SS.isNotEmpty())
	Template = Context.getQualifiedTemplateName(SS.getScopeRep(),
	/TemplateKeyword=/false,
	Template);
	} else {
	// All results were non-template functions. This is a function template
	// name.
	IsFunctionTemplate = true;
	Template = Context.getAssumedTemplateName(NameInfo.getName());
	}

	if (IsFunctionTemplate) {
	// Function templates always go through overload resolution, at which
	// point we'll perform the various checks (e.g., accessibility) we need
	// to based on which function we selected.
	Result.suppressDiagnostics();

	return NameClassification::FunctionTemplate(Template);
	}

	return IsVarTemplate ? NameClassification::VarTemplate(Template)
	: NameClassification::TypeTemplate(Template);
	}

	auto BuildTypeFor = [&](TypeDecl Type, NamedDecl Found) {
	QualType T = Context.getTypeDeclType(Type);
	if (const auto *USD = dyn_cast<UsingShadowDecl>(Found))
	T = Context.getUsingType(USD, T);
	return buildNamedType(*this, &SS, T, NameLoc);
	};

	NamedDecl FirstDecl = (Result.begin())->getUnderlyingDecl();
	if (TypeDecl *Type = dyn_cast<TypeDecl>(FirstDecl)) {
	DiagnoseUseOfDecl(Type, NameLoc);
	MarkAnyDeclReferenced(Type->getLocation(), Type, /OdrUse=/false);
	return BuildTypeFor(Type, *Result.begin());
	}

	ObjCInterfaceDecl *Class = dyn_cast<ObjCInterfaceDecl>(FirstDecl);
	if (!Class) {
	// FIXME: It's unfortunate that we don't have a Type node for handling this.
	if (ObjCCompatibleAliasDecl *Alias =
	dyn_cast<ObjCCompatibleAliasDecl>(FirstDecl))
	Class = Alias->getClassInterface();
	}

	if (Class) {
	DiagnoseUseOfDecl(Class, NameLoc);

	if (NextToken.is(tok::period)) {
	// Interface. <something> is parsed as a property reference expression.
	// Just return "unknown" as a fall-through for now.
	Result.suppressDiagnostics();
	return NameClassification::Unknown();
	}

	QualType T = Context.getObjCInterfaceType(Class);
	return ParsedType::make(T);
	}

	if (isa<ConceptDecl>(FirstDecl))
	return NameClassification::Concept(
	TemplateName(cast<TemplateDecl>(FirstDecl)));

	if (auto *EmptyD = dyn_cast<UnresolvedUsingIfExistsDecl>(FirstDecl)) {
	(void)DiagnoseUseOfDecl(EmptyD, NameLoc);
	return NameClassification::Error();
	}

	// We can have a type template here if we're classifying a template argument.
	if (isa<TemplateDecl>(FirstDecl) && !isa<FunctionTemplateDecl>(FirstDecl) &&
	!isa<VarTemplateDecl>(FirstDecl))
	return NameClassification::TypeTemplate(
	TemplateName(cast<TemplateDecl>(FirstDecl)));

	// Check for a tag type hidden by a non-type decl in a few cases where it
	// seems likely a type is wanted instead of the non-type that was found.
	bool NextIsOp = NextToken.isOneOf(tok::amp, tok::star);
	if ((NextToken.is(tok::identifier) \|\|
	(NextIsOp &&
	FirstDecl->getUnderlyingDecl()->isFunctionOrFunctionTemplate())) &&
	isTagTypeWithMissingTag(*this, Result, S, SS, Name, NameLoc)) {
	TypeDecl *Type = Result.getAsSingle<TypeDecl>();
	DiagnoseUseOfDecl(Type, NameLoc);
	return BuildTypeFor(Type, *Result.begin());
	}

	// If we already know which single declaration is referenced, just annotate
	// that declaration directly. Defer resolving even non-overloaded class
	// member accesses, as we need to defer certain access checks until we know
	// the context.
	bool ADL = UseArgumentDependentLookup(SS, Result, NextToken.is(tok::l_paren));
	if (Result.isSingleResult() && !ADL &&
	(!FirstDecl->isCXXClassMember() \|\| isa<EnumConstantDecl>(FirstDecl)))
	return NameClassification::NonType(Result.getRepresentativeDecl());

	// Otherwise, this is an overload set that we will need to resolve later.
	Result.suppressDiagnostics();
	return NameClassification::OverloadSet(UnresolvedLookupExpr::Create(
	Context, Result.getNamingClass(), SS.getWithLocInContext(Context),
	Result.getLookupNameInfo(), ADL, Result.isOverloadedResult(),
	Result.begin(), Result.end()));
	}

	ExprResult
	Sema::ActOnNameClassifiedAsUndeclaredNonType(IdentifierInfo *Name,
	SourceLocation NameLoc) {
	assert(getLangOpts().CPlusPlus && "ADL-only call in C?");
	CXXScopeSpec SS;
	LookupResult Result(*this, Name, NameLoc, LookupOrdinaryName);
	return BuildDeclarationNameExpr(SS, Result, /ADL=/true);
	}

	ExprResult
	Sema::ActOnNameClassifiedAsDependentNonType(const CXXScopeSpec &SS,
	IdentifierInfo *Name,
	SourceLocation NameLoc,
	bool IsAddressOfOperand) {
	DeclarationNameInfo NameInfo(Name, NameLoc);
	return ActOnDependentIdExpression(SS, /TemplateKWLoc=/SourceLocation(),
	NameInfo, IsAddressOfOperand,
	/TemplateArgs=/nullptr);
	}

	ExprResult Sema::ActOnNameClassifiedAsNonType(Scope *S, const CXXScopeSpec &SS,
	NamedDecl *Found,
	SourceLocation NameLoc,
	const Token &NextToken) {
	if (getCurMethodDecl() && SS.isEmpty())
	if (auto *Ivar = dyn_cast<ObjCIvarDecl>(Found->getUnderlyingDecl()))
	return BuildIvarRefExpr(S, NameLoc, Ivar);

	// Reconstruct the lookup result.
	LookupResult Result(*this, Found->getDeclName(), NameLoc, LookupOrdinaryName);
	Result.addDecl(Found);
	Result.resolveKind();

	bool ADL = UseArgumentDependentLookup(SS, Result, NextToken.is(tok::l_paren));
	return BuildDeclarationNameExpr(SS, Result, ADL, /AcceptInvalidDecl=/true);
	}

	ExprResult Sema::ActOnNameClassifiedAsOverloadSet(Scope S, Expr E) {
	// For an implicit class member access, transform the result into a member
	// access expression if necessary.
	auto *ULE = cast<UnresolvedLookupExpr>(E);
	if ((*ULE->decls_begin())->isCXXClassMember()) {
	CXXScopeSpec SS;
	SS.Adopt(ULE->getQualifierLoc());

	// Reconstruct the lookup result.
	LookupResult Result(*this, ULE->getName(), ULE->getNameLoc(),
	LookupOrdinaryName);
	Result.setNamingClass(ULE->getNamingClass());
	for (auto I = ULE->decls_begin(), E = ULE->decls_end(); I != E; ++I)
	Result.addDecl(*I, I.getAccess());
	Result.resolveKind();
	return BuildPossibleImplicitMemberExpr(SS, SourceLocation(), Result,
	nullptr, S);
	}

	// Otherwise, this is already in the form we needed, and no further checks
	// are necessary.
	return ULE;
	}

	Sema::TemplateNameKindForDiagnostics
	Sema::getTemplateNameKindForDiagnostics(TemplateName Name) {
	auto *TD = Name.getAsTemplateDecl();
	if (!TD)
	return TemplateNameKindForDiagnostics::DependentTemplate;
	if (isa<ClassTemplateDecl>(TD))
	return TemplateNameKindForDiagnostics::ClassTemplate;
	if (isa<FunctionTemplateDecl>(TD))
	return TemplateNameKindForDiagnostics::FunctionTemplate;
	if (isa<VarTemplateDecl>(TD))
	return TemplateNameKindForDiagnostics::VarTemplate;
	if (isa<TypeAliasTemplateDecl>(TD))
	return TemplateNameKindForDiagnostics::AliasTemplate;
	if (isa<TemplateTemplateParmDecl>(TD))
	return TemplateNameKindForDiagnostics::TemplateTemplateParam;
	if (isa<ConceptDecl>(TD))
	return TemplateNameKindForDiagnostics::Concept;
	return TemplateNameKindForDiagnostics::DependentTemplate;
	}

	void Sema::PushDeclContext(Scope S, DeclContext DC) {
	assert(DC->getLexicalParent() == CurContext &&
	"The next DeclContext should be lexically contained in the current one.");
	CurContext = DC;
	S->setEntity(DC);
	}

	void Sema::PopDeclContext() {
	assert(CurContext && "DeclContext imbalance!");

	CurContext = CurContext->getLexicalParent();
	assert(CurContext && "Popped translation unit!");
	}

	Sema::SkippedDefinitionContext Sema::ActOnTagStartSkippedDefinition(Scope *S,
	Decl *D) {
	// Unlike PushDeclContext, the context to which we return is not necessarily
	// the containing DC of TD, because the new context will be some pre-existing
	// TagDecl definition instead of a fresh one.
	auto Result = static_cast<SkippedDefinitionContext>(CurContext);
	CurContext = cast<TagDecl>(D)->getDefinition();
	assert(CurContext && "skipping definition of undefined tag");
	// Start lookups from the parent of the current context; we don't want to look
	// into the pre-existing complete definition.
	S->setEntity(CurContext->getLookupParent());
	return Result;
	}

	void Sema::ActOnTagFinishSkippedDefinition(SkippedDefinitionContext Context) {
	CurContext = static_cast<decltype(CurContext)>(Context);
	}

	/// EnterDeclaratorContext - Used when we must lookup names in the context
	/// of a declarator's nested name specifier.
	///
	void Sema::EnterDeclaratorContext(Scope S, DeclContext DC) {
	// C++0x [basic.lookup.unqual]p13:
	// A name used in the definition of a static data member of class
	// X (after the qualified-id of the static member) is looked up as
	// if the name was used in a member function of X.
	// C++0x [basic.lookup.unqual]p14:
	// If a variable member of a namespace is defined outside of the
	// scope of its namespace then any name used in the definition of
	// the variable member (after the declarator-id) is looked up as
	// if the definition of the variable member occurred in its
	// namespace.
	// Both of these imply that we should push a scope whose context
	// is the semantic context of the declaration. We can't use
	// PushDeclContext here because that context is not necessarily
	// lexically contained in the current context. Fortunately,
	// the containing scope should have the appropriate information.

	assert(!S->getEntity() && "scope already has entity");

	#ifndef NDEBUG
	Scope *Ancestor = S->getParent();
	while (!Ancestor->getEntity()) Ancestor = Ancestor->getParent();
	assert(Ancestor->getEntity() == CurContext && "ancestor context mismatch");
	#endif

	CurContext = DC;
	S->setEntity(DC);

	if (S->getParent()->isTemplateParamScope()) {
	// Also set the corresponding entities for all immediately-enclosing
	// template parameter scopes.
	EnterTemplatedContext(S->getParent(), DC);
	}
	}

	void Sema::ExitDeclaratorContext(Scope *S) {
	assert(S->getEntity() == CurContext && "Context imbalance!");

	// Switch back to the lexical context. The safety of this is
	// enforced by an assert in EnterDeclaratorContext.
	Scope *Ancestor = S->getParent();
	while (!Ancestor->getEntity()) Ancestor = Ancestor->getParent();
	CurContext = Ancestor->getEntity();

	// We don't need to do anything with the scope, which is going to
	// disappear.
	}

	void Sema::EnterTemplatedContext(Scope S, DeclContext DC) {
	assert(S->isTemplateParamScope() &&
	"expected to be initializing a template parameter scope");

	// C++20 [temp.local]p7:
	// In the definition of a member of a class template that appears outside
	// of the class template definition, the name of a member of the class
	// template hides the name of a template-parameter of any enclosing class
	// templates (but not a template-parameter of the member if the member is a
	// class or function template).
	// C++20 [temp.local]p9:
	// In the definition of a class template or in the definition of a member
	// of such a template that appears outside of the template definition, for
	// each non-dependent base class (13.8.2.1), if the name of the base class
	// or the name of a member of the base class is the same as the name of a
	// template-parameter, the base class name or member name hides the
	// template-parameter name (6.4.10).
	//
	// This means that a template parameter scope should be searched immediately
	// after searching the DeclContext for which it is a template parameter
	// scope. For example, for
	// template<typename T> template<typename U> template<typename V>
	// void N::A<T>::B<U>::f(...)
	// we search V then B<U> (and base classes) then U then A<T> (and base
	// classes) then T then N then ::.
	unsigned ScopeDepth = getTemplateDepth(S);
	for (; S && S->isTemplateParamScope(); S = S->getParent(), --ScopeDepth) {
	DeclContext *SearchDCAfterScope = DC;
	for (; DC; DC = DC->getLookupParent()) {
	if (const TemplateParameterList *TPL =
	cast<Decl>(DC)->getDescribedTemplateParams()) {
	unsigned DCDepth = TPL->getDepth() + 1;
	if (DCDepth > ScopeDepth)
	continue;
	if (ScopeDepth == DCDepth)
	SearchDCAfterScope = DC = DC->getLookupParent();
	break;
	}
	}
	S->setLookupEntity(SearchDCAfterScope);
	}
	}

	void Sema::ActOnReenterFunctionContext(Scope* S, Decl *D) {
	// We assume that the caller has already called
	// ActOnReenterTemplateScope so getTemplatedDecl() works.
	FunctionDecl *FD = D->getAsFunction();
	if (!FD)
	return;

	// Same implementation as PushDeclContext, but enters the context
	// from the lexical parent, rather than the top-level class.
	assert(CurContext == FD->getLexicalParent() &&
	"The next DeclContext should be lexically contained in the current one.");
	CurContext = FD;
	S->setEntity(CurContext);

	for (unsigned P = 0, NumParams = FD->getNumParams(); P < NumParams; ++P) {
	ParmVarDecl *Param = FD->getParamDecl(P);
	// If the parameter has an identifier, then add it to the scope
	if (Param->getIdentifier()) {
	S->AddDecl(Param);
	IdResolver.AddDecl(Param);
	}
	}
	}

	void Sema::ActOnExitFunctionContext() {
	// Same implementation as PopDeclContext, but returns to the lexical parent,
	// rather than the top-level class.
	assert(CurContext && "DeclContext imbalance!");
	CurContext = CurContext->getLexicalParent();
	assert(CurContext && "Popped translation unit!");
	}

	/// Determine whether overloading is allowed for a new function
	/// declaration considering prior declarations of the same name.
	///
	/// This routine determines whether overloading is possible, not
	/// whether a new declaration actually overloads a previous one.
	/// It will return true in C++ (where overloads are alway permitted)
	/// or, as a C extension, when either the new declaration or a
	/// previous one is declared with the 'overloadable' attribute.
	static bool AllowOverloadingOfFunction(const LookupResult &Previous,
	ASTContext &Context,
	const FunctionDecl *New) {
	if (Context.getLangOpts().CPlusPlus \|\| New->hasAttr<OverloadableAttr>())
	return true;

	// Multiversion function declarations are not overloads in the
	// usual sense of that term, but lookup will report that an
	// overload set was found if more than one multiversion function
	// declaration is present for the same name. It is therefore
	// inadequate to assume that some prior declaration(s) had
	// the overloadable attribute; checking is required. Since one
	// declaration is permitted to omit the attribute, it is necessary
	// to check at least two; hence the 'any_of' check below. Note that
	// the overloadable attribute is implicitly added to declarations
	// that were required to have it but did not.
	if (Previous.getResultKind() == LookupResult::FoundOverloaded) {
	return llvm::any_of(Previous, [](const NamedDecl *ND) {
	return ND->hasAttr<OverloadableAttr>();
	});
	} else if (Previous.getResultKind() == LookupResult::Found)
	return Previous.getFoundDecl()->hasAttr<OverloadableAttr>();

	return false;
	}

	/// Add this decl to the scope shadowed decl chains.
	void Sema::PushOnScopeChains(NamedDecl D, Scope S, bool AddToContext) {
	// Move up the scope chain until we find the nearest enclosing
	// non-transparent context. The declaration will be introduced into this
	// scope.
	while (S->getEntity() && S->getEntity()->isTransparentContext())
	S = S->getParent();

	// Add scoped declarations into their context, so that they can be
	// found later. Declarations without a context won't be inserted
	// into any context.
	if (AddToContext)
	CurContext->addDecl(D);

	// Out-of-line definitions shouldn't be pushed into scope in C++, unless they
	// are function-local declarations.
	if (getLangOpts().CPlusPlus && D->isOutOfLine() && !S->getFnParent())
	return;

	// Template instantiations should also not be pushed into scope.
	if (isa<FunctionDecl>(D) &&
	cast<FunctionDecl>(D)->isFunctionTemplateSpecialization())
	return;

	// If this replaces anything in the current scope,
	IdentifierResolver::iterator I = IdResolver.begin(D->getDeclName()),
	IEnd = IdResolver.end();
	for (; I != IEnd; ++I) {
	if (S->isDeclScope(I) && D->declarationReplaces(I)) {
	S->RemoveDecl(*I);
	IdResolver.RemoveDecl(*I);

	// Should only need to replace one decl.
	break;
	}
	}

	S->AddDecl(D);

	if (isa<LabelDecl>(D) && !cast<LabelDecl>(D)->isGnuLocal()) {
	// Implicitly-generated labels may end up getting generated in an order that
	// isn't strictly lexical, which breaks name lookup. Be careful to insert
	// the label at the appropriate place in the identifier chain.
	for (I = IdResolver.begin(D->getDeclName()); I != IEnd; ++I) {
	DeclContext IDC = (I)->getLexicalDeclContext()->getRedeclContext();
	if (IDC == CurContext) {
	if (!S->isDeclScope(*I))
	continue;
	} else if (IDC->Encloses(CurContext))
	break;
	}

	IdResolver.InsertDeclAfter(I, D);
	} else {
	IdResolver.AddDecl(D);
	}
	warnOnReservedIdentifier(D);
	}

	bool Sema::isDeclInScope(NamedDecl D, DeclContext Ctx, Scope *S,
	bool AllowInlineNamespace) {
	return IdResolver.isDeclInScope(D, Ctx, S, AllowInlineNamespace);
	}

	Scope Sema::getScopeForDeclContext(Scope S, DeclContext *DC) {
	DeclContext *TargetDC = DC->getPrimaryContext();
	do {
	if (DeclContext *ScopeDC = S->getEntity())
	if (ScopeDC->getPrimaryContext() == TargetDC)
	return S;
	} while ((S = S->getParent()));

	return nullptr;
	}

	static bool isOutOfScopePreviousDeclaration(NamedDecl *,
	DeclContext*,
	ASTContext&);

	/// Filters out lookup results that don't fall within the given scope
	/// as determined by isDeclInScope.
	void Sema::FilterLookupForScope(LookupResult &R, DeclContext Ctx, Scope S,
	bool ConsiderLinkage,
	bool AllowInlineNamespace) {
	LookupResult::Filter F = R.makeFilter();
	while (F.hasNext()) {
	NamedDecl *D = F.next();

	if (isDeclInScope(D, Ctx, S, AllowInlineNamespace))
	continue;

	if (ConsiderLinkage && isOutOfScopePreviousDeclaration(D, Ctx, Context))
	continue;

	F.erase();
	}

	F.done();
	}

	/// We've determined that \p New is a redeclaration of \p Old. Check that they
	/// have compatible owning modules.
	bool Sema::CheckRedeclarationModuleOwnership(NamedDecl New, NamedDecl Old) {
	// [module.interface]p7:
	// A declaration is attached to a module as follows:
	// - If the declaration is a non-dependent friend declaration that nominates a
	// function with a declarator-id that is a qualified-id or template-id or that
	// nominates a class other than with an elaborated-type-specifier with neither
	// a nested-name-specifier nor a simple-template-id, it is attached to the
	// module to which the friend is attached ([basic.link]).
	if (New->getFriendObjectKind() &&
	Old->getOwningModuleForLinkage() != New->getOwningModuleForLinkage()) {
	New->setLocalOwningModule(Old->getOwningModule());
	makeMergedDefinitionVisible(New);
	return false;
	}

	Module *NewM = New->getOwningModule();
	Module *OldM = Old->getOwningModule();

	if (NewM && NewM->isPrivateModule())
	NewM = NewM->Parent;
	if (OldM && OldM->isPrivateModule())
	OldM = OldM->Parent;

	if (NewM == OldM)
	return false;

	// Partitions are part of the module, but a partition could import another
	// module, so verify that the PMIs agree.
	if (NewM && OldM &&
	(NewM->isModulePartition() \|\| OldM->isModulePartition()) &&
	NewM->getPrimaryModuleInterfaceName() ==
	OldM->getPrimaryModuleInterfaceName())
	return false;

	bool NewIsModuleInterface = NewM && NewM->isModulePurview();
	bool OldIsModuleInterface = OldM && OldM->isModulePurview();
	if (NewIsModuleInterface \|\| OldIsModuleInterface) {
	// C++ Modules TS [basic.def.odr] 6.2/6.7 [sic]:
	// if a declaration of D [...] appears in the purview of a module, all
	// other such declarations shall appear in the purview of the same module
	Diag(New->getLocation(), diag::err_mismatched_owning_module)
	<< New
	<< NewIsModuleInterface
	<< (NewIsModuleInterface ? NewM->getFullModuleName() : "")
	<< OldIsModuleInterface
	<< (OldIsModuleInterface ? OldM->getFullModuleName() : "");
	Diag(Old->getLocation(), diag::note_previous_declaration);
	New->setInvalidDecl();
	return true;
	}

	return false;
	}

	// [module.interface]p6:
	// A redeclaration of an entity X is implicitly exported if X was introduced by
	// an exported declaration; otherwise it shall not be exported.
	bool Sema::CheckRedeclarationExported(NamedDecl New, NamedDecl Old) {
	// [module.interface]p1:
	// An export-declaration shall inhabit a namespace scope.
	//
	// So it is meaningless to talk about redeclaration which is not at namespace
	// scope.
	if (!New->getLexicalDeclContext()
	->getNonTransparentContext()
	->isFileContext() \|\|
	!Old->getLexicalDeclContext()
	->getNonTransparentContext()
	->isFileContext())
	return false;

	bool IsNewExported = New->isInExportDeclContext();
	bool IsOldExported = Old->isInExportDeclContext();

	// It should be irrevelant if both of them are not exported.
	if (!IsNewExported && !IsOldExported)
	return false;

	if (IsOldExported)
	return false;

	assert(IsNewExported);

	auto Lk = Old->getFormalLinkage();
	int S = 0;
	if (Lk == Linkage::InternalLinkage)
	S = 1;
	else if (Lk == Linkage::ModuleLinkage)
	S = 2;
	Diag(New->getLocation(), diag::err_redeclaration_non_exported) << New << S;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	return true;
	}

	// A wrapper function for checking the semantic restrictions of
	// a redeclaration within a module.
	bool Sema::CheckRedeclarationInModule(NamedDecl New, NamedDecl Old) {
	if (CheckRedeclarationModuleOwnership(New, Old))
	return true;

	if (CheckRedeclarationExported(New, Old))
	return true;

	return false;
	}

	// Check the redefinition in C++20 Modules.
	//
	// [basic.def.odr]p14:
	// For any definable item D with definitions in multiple translation units,
	// - if D is a non-inline non-templated function or variable, or
	// - if the definitions in different translation units do not satisfy the
	// following requirements,
	// the program is ill-formed; a diagnostic is required only if the definable
	// item is attached to a named module and a prior definition is reachable at
	// the point where a later definition occurs.
	// - Each such definition shall not be attached to a named module
	// ([module.unit]).
	// - Each such definition shall consist of the same sequence of tokens, ...
	// ...
	//
	// Return true if the redefinition is not allowed. Return false otherwise.
	bool Sema::IsRedefinitionInModule(const NamedDecl *New,
	const NamedDecl *Old) const {
	assert(getASTContext().isSameEntity(New, Old) &&
	"New and Old are not the same definition, we should diagnostic it "
	"immediately instead of checking it.");
	assert(const_cast<Sema *>(this)->isReachable(New) &&
	const_cast<Sema *>(this)->isReachable(Old) &&
	"We shouldn't see unreachable definitions here.");

	Module *NewM = New->getOwningModule();
	Module *OldM = Old->getOwningModule();

	// We only checks for named modules here. The header like modules is skipped.
	// FIXME: This is not right if we import the header like modules in the module
	// purview.
	//
	// For example, assuming "header.h" provides definition for `D`.
	// ```C++
	// //--- M.cppm
	// export module M;
	// import "header.h"; // or #include "header.h" but import it by clang modules
	// actually.
	//
	// //--- Use.cpp
	// import M;
	// import "header.h"; // or uses clang modules.
	// ```
	//
	// In this case, `D` has multiple definitions in multiple TU (M.cppm and
	// Use.cpp) and `D` is attached to a named module `M`. The compiler should
	// reject it. But the current implementation couldn't detect the case since we
	// don't record the information about the importee modules.
	//
	// But this might not be painful in practice. Since the design of C++20 Named
	// Modules suggests us to use headers in global module fragment instead of
	// module purview.
	if (NewM && NewM->isHeaderLikeModule())
	NewM = nullptr;
	if (OldM && OldM->isHeaderLikeModule())
	OldM = nullptr;

	if (!NewM && !OldM)
	return true;

	// [basic.def.odr]p14.3
	// Each such definition shall not be attached to a named module
	// ([module.unit]).
	if ((NewM && NewM->isModulePurview()) \|\| (OldM && OldM->isModulePurview()))
	return true;

	// Then New and Old lives in the same TU if their share one same module unit.
	if (NewM)
	NewM = NewM->getTopLevelModule();
	if (OldM)
	OldM = OldM->getTopLevelModule();
	return OldM == NewM;
	}

	static bool isUsingDecl(NamedDecl *D) {
	return isa<UsingShadowDecl>(D) \|\|
	isa<UnresolvedUsingTypenameDecl>(D) \|\|
	isa<UnresolvedUsingValueDecl>(D);
	}

	/// Removes using shadow declarations from the lookup results.
	static void RemoveUsingDecls(LookupResult &R) {
	LookupResult::Filter F = R.makeFilter();
	while (F.hasNext())
	if (isUsingDecl(F.next()))
	F.erase();

	F.done();
	}

	/// Check for this common pattern:
	/// @code
	/// class S {
	/// S(const S&); // DO NOT IMPLEMENT
	/// void operator=(const S&); // DO NOT IMPLEMENT
	/// };
	/// @endcode
	static bool IsDisallowedCopyOrAssign(const CXXMethodDecl *D) {
	// FIXME: Should check for private access too but access is set after we get
	// the decl here.
	if (D->doesThisDeclarationHaveABody())
	return false;

	if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(D))
	return CD->isCopyConstructor();
	return D->isCopyAssignmentOperator();
	}

	// We need this to handle
	//
	// typedef struct {
	// void *foo() { return 0; }
	// } A;
	//
	// When we see foo we don't know if after the typedef we will get 'A' or '*A'
	// for example. If 'A', foo will have external linkage. If we have '*A',
	// foo will have no linkage. Since we can't know until we get to the end
	// of the typedef, this function finds out if D might have non-external linkage.
	// Callers should verify at the end of the TU if it D has external linkage or
	// not.
	bool Sema::mightHaveNonExternalLinkage(const DeclaratorDecl *D) {
	const DeclContext *DC = D->getDeclContext();
	while (!DC->isTranslationUnit()) {
	if (const RecordDecl *RD = dyn_cast<RecordDecl>(DC)){
	if (!RD->hasNameForLinkage())
	return true;
	}
	DC = DC->getParent();
	}

	return !D->isExternallyVisible();
	}

	// FIXME: This needs to be refactored; some other isInMainFile users want
	// these semantics.
	static bool isMainFileLoc(const Sema &S, SourceLocation Loc) {
	if (S.TUKind != TU_Complete \|\| S.getLangOpts().IsHeaderFile)
	return false;
	return S.SourceMgr.isInMainFile(Loc);
	}

	bool Sema::ShouldWarnIfUnusedFileScopedDecl(const DeclaratorDecl *D) const {
	assert(D);

	if (D->isInvalidDecl() \|\| D->isUsed() \|\| D->hasAttr<UnusedAttr>())
	return false;

	// Ignore all entities declared within templates, and out-of-line definitions
	// of members of class templates.
	if (D->getDeclContext()->isDependentContext() \|\|
	D->getLexicalDeclContext()->isDependentContext())
	return false;

	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return false;
	// A non-out-of-line declaration of a member specialization was implicitly
	// instantiated; it's the out-of-line declaration that we're interested in.
	if (FD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
	FD->getMemberSpecializationInfo() && !FD->isOutOfLine())
	return false;

	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
	if (MD->isVirtual() \|\| IsDisallowedCopyOrAssign(MD))
	return false;
	} else {
	// 'static inline' functions are defined in headers; don't warn.
	if (FD->isInlined() && !isMainFileLoc(*this, FD->getLocation()))
	return false;
	}

	if (FD->doesThisDeclarationHaveABody() &&
	Context.DeclMustBeEmitted(FD))
	return false;
	} else if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
	// Constants and utility variables are defined in headers with internal
	// linkage; don't warn. (Unlike functions, there isn't a convenient marker
	// like "inline".)
	if (!isMainFileLoc(*this, VD->getLocation()))
	return false;

	if (Context.DeclMustBeEmitted(VD))
	return false;

	if (VD->isStaticDataMember() &&
	VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return false;
	if (VD->isStaticDataMember() &&
	VD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
	VD->getMemberSpecializationInfo() && !VD->isOutOfLine())
	return false;

	if (VD->isInline() && !isMainFileLoc(*this, VD->getLocation()))
	return false;
	} else {
	return false;
	}

	// Only warn for unused decls internal to the translation unit.
	// FIXME: This seems like a bogus check; it suppresses -Wunused-function
	// for inline functions defined in the main source file, for instance.
	return mightHaveNonExternalLinkage(D);
	}

	void Sema::MarkUnusedFileScopedDecl(const DeclaratorDecl *D) {
	if (!D)
	return;

	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	const FunctionDecl *First = FD->getFirstDecl();
	if (FD != First && ShouldWarnIfUnusedFileScopedDecl(First))
	return; // First should already be in the vector.
	}

	if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
	const VarDecl *First = VD->getFirstDecl();
	if (VD != First && ShouldWarnIfUnusedFileScopedDecl(First))
	return; // First should already be in the vector.
	}

	if (ShouldWarnIfUnusedFileScopedDecl(D))
	UnusedFileScopedDecls.push_back(D);
	}

	static bool ShouldDiagnoseUnusedDecl(const NamedDecl *D) {
	if (D->isInvalidDecl())
	return false;

	if (auto *DD = dyn_cast<DecompositionDecl>(D)) {
	// For a decomposition declaration, warn if none of the bindings are
	// referenced, instead of if the variable itself is referenced (which
	// it is, by the bindings' expressions).
	for (auto *BD : DD->bindings())
	if (BD->isReferenced())
	return false;
	} else if (!D->getDeclName()) {
	return false;
	} else if (D->isReferenced() \|\| D->isUsed()) {
	return false;
	}

	if (D->hasAttr<UnusedAttr>() \|\| D->hasAttr<ObjCPreciseLifetimeAttr>())
	return false;

	if (isa<LabelDecl>(D))
	return true;

	// Except for labels, we only care about unused decls that are local to
	// functions.
	bool WithinFunction = D->getDeclContext()->isFunctionOrMethod();
	if (const auto *R = dyn_cast<CXXRecordDecl>(D->getDeclContext()))
	// For dependent types, the diagnostic is deferred.
	WithinFunction =
	WithinFunction \|\| (R->isLocalClass() && !R->isDependentType());
	if (!WithinFunction)
	return false;

	if (isa<TypedefNameDecl>(D))
	return true;

	// White-list anything that isn't a local variable.
	if (!isa<VarDecl>(D) \|\| isa<ParmVarDecl>(D) \|\| isa<ImplicitParamDecl>(D))
	return false;

	// Types of valid local variables should be complete, so this should succeed.
	if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {

	const Expr *Init = VD->getInit();
	if (const auto *Cleanups = dyn_cast_or_null<ExprWithCleanups>(Init))
	Init = Cleanups->getSubExpr();

	const auto *Ty = VD->getType().getTypePtr();

	// Only look at the outermost level of typedef.
	if (const TypedefType *TT = Ty->getAs<TypedefType>()) {
	// Allow anything marked with __attribute__((unused)).
	if (TT->getDecl()->hasAttr<UnusedAttr>())
	return false;
	}

	// Warn for reference variables whose initializtion performs lifetime
	// extension.
	if (const auto *MTE = dyn_cast_or_null<MaterializeTemporaryExpr>(Init)) {
	if (MTE->getExtendingDecl()) {
	Ty = VD->getType().getNonReferenceType().getTypePtr();
	Init = MTE->getSubExpr()->IgnoreImplicitAsWritten();
	}
	}

	// If we failed to complete the type for some reason, or if the type is
	// dependent, don't diagnose the variable.
	if (Ty->isIncompleteType() \|\| Ty->isDependentType())
	return false;

	// Look at the element type to ensure that the warning behaviour is
	// consistent for both scalars and arrays.
	Ty = Ty->getBaseElementTypeUnsafe();

	if (const TagType *TT = Ty->getAs<TagType>()) {
	const TagDecl *Tag = TT->getDecl();
	if (Tag->hasAttr<UnusedAttr>())
	return false;

	if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Tag)) {
	if (!RD->hasTrivialDestructor() && !RD->hasAttr<WarnUnusedAttr>())
	return false;

	if (Init) {
	const CXXConstructExpr *Construct =
	dyn_cast<CXXConstructExpr>(Init);
	if (Construct && !Construct->isElidable()) {
	CXXConstructorDecl *CD = Construct->getConstructor();
	if (!CD->isTrivial() && !RD->hasAttr<WarnUnusedAttr>() &&
	(VD->getInit()->isValueDependent() \|\| !VD->evaluateValue()))
	return false;
	}

	// Suppress the warning if we don't know how this is constructed, and
	// it could possibly be non-trivial constructor.
	if (Init->isTypeDependent()) {
	for (const CXXConstructorDecl *Ctor : RD->ctors())
	if (!Ctor->isTrivial())
	return false;
	}

	// Suppress the warning if the constructor is unresolved because
	// its arguments are dependent.
	if (isa<CXXUnresolvedConstructExpr>(Init))
	return false;
	}
	}
	}

	// TODO: __attribute__((unused)) templates?
	}

	return true;
	}

	static void GenerateFixForUnusedDecl(const NamedDecl *D, ASTContext &Ctx,
	FixItHint &Hint) {
	if (isa<LabelDecl>(D)) {
	SourceLocation AfterColon = Lexer::findLocationAfterToken(
	D->getEndLoc(), tok::colon, Ctx.getSourceManager(), Ctx.getLangOpts(),
	true);
	if (AfterColon.isInvalid())
	return;
	Hint = FixItHint::CreateRemoval(
	CharSourceRange::getCharRange(D->getBeginLoc(), AfterColon));
	}
	}

	void Sema::DiagnoseUnusedNestedTypedefs(const RecordDecl *D) {
	DiagnoseUnusedNestedTypedefs(
	D, [this](SourceLocation Loc, PartialDiagnostic PD) { Diag(Loc, PD); });
	}

	void Sema::DiagnoseUnusedNestedTypedefs(const RecordDecl *D,
	DiagReceiverTy DiagReceiver) {
	if (D->getTypeForDecl()->isDependentType())
	return;

	for (auto *TmpD : D->decls()) {
	if (const auto *T = dyn_cast<TypedefNameDecl>(TmpD))
	DiagnoseUnusedDecl(T, DiagReceiver);
	else if(const auto *R = dyn_cast<RecordDecl>(TmpD))
	DiagnoseUnusedNestedTypedefs(R, DiagReceiver);
	}
	}

	void Sema::DiagnoseUnusedDecl(const NamedDecl *D) {
	DiagnoseUnusedDecl(
	D, [this](SourceLocation Loc, PartialDiagnostic PD) { Diag(Loc, PD); });
	}

	/// DiagnoseUnusedDecl - Emit warnings about declarations that are not used
	/// unless they are marked attr(unused).
	void Sema::DiagnoseUnusedDecl(const NamedDecl *D, DiagReceiverTy DiagReceiver) {
	if (!ShouldDiagnoseUnusedDecl(D))
	return;

	if (auto *TD = dyn_cast<TypedefNameDecl>(D)) {
	// typedefs can be referenced later on, so the diagnostics are emitted
	// at end-of-translation-unit.
	UnusedLocalTypedefNameCandidates.insert(TD);
	return;
	}

	FixItHint Hint;
	GenerateFixForUnusedDecl(D, Context, Hint);

	unsigned DiagID;
	if (isa<VarDecl>(D) && cast<VarDecl>(D)->isExceptionVariable())
	DiagID = diag::warn_unused_exception_param;
	else if (isa<LabelDecl>(D))
	DiagID = diag::warn_unused_label;
	else
	DiagID = diag::warn_unused_variable;

	DiagReceiver(D->getLocation(), PDiag(DiagID) << D << Hint);
	}

	void Sema::DiagnoseUnusedButSetDecl(const VarDecl *VD,
	DiagReceiverTy DiagReceiver) {
	// If it's not referenced, it can't be set. If it has the Cleanup attribute,
	// it's not really unused.
	if (!VD->isReferenced() \|\| !VD->getDeclName() \|\| VD->hasAttr<UnusedAttr>() \|\|
	VD->hasAttr<CleanupAttr>())
	return;

	const auto *Ty = VD->getType().getTypePtr()->getBaseElementTypeUnsafe();

	if (Ty->isReferenceType() \|\| Ty->isDependentType())
	return;

	if (const TagType *TT = Ty->getAs<TagType>()) {
	const TagDecl *Tag = TT->getDecl();
	if (Tag->hasAttr<UnusedAttr>())
	return;
	// In C++, don't warn for record types that don't have WarnUnusedAttr, to
	// mimic gcc's behavior.
	if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Tag)) {
	if (!RD->hasAttr<WarnUnusedAttr>())
	return;
	}
	}

	// Don't warn about __block Objective-C pointer variables, as they might
	// be assigned in the block but not used elsewhere for the purpose of lifetime
	// extension.
	if (VD->hasAttr<BlocksAttr>() && Ty->isObjCObjectPointerType())
	return;

	// Don't warn about Objective-C pointer variables with precise lifetime
	// semantics; they can be used to ensure ARC releases the object at a known
	// time, which may mean assignment but no other references.
	if (VD->hasAttr<ObjCPreciseLifetimeAttr>() && Ty->isObjCObjectPointerType())
	return;

	auto iter = RefsMinusAssignments.find(VD);
	if (iter == RefsMinusAssignments.end())
	return;

	assert(iter->getSecond() >= 0 &&
	"Found a negative number of references to a VarDecl");
	if (iter->getSecond() != 0)
	return;
	unsigned DiagID = isa<ParmVarDecl>(VD) ? diag::warn_unused_but_set_parameter
	: diag::warn_unused_but_set_variable;
	DiagReceiver(VD->getLocation(), PDiag(DiagID) << VD);
	}

	static void CheckPoppedLabel(LabelDecl *L, Sema &S,
	Sema::DiagReceiverTy DiagReceiver) {
	// Verify that we have no forward references left. If so, there was a goto
	// or address of a label taken, but no definition of it. Label fwd
	// definitions are indicated with a null substmt which is also not a resolved
	// MS inline assembly label name.
	bool Diagnose = false;
	if (L->isMSAsmLabel())
	Diagnose = !L->isResolvedMSAsmLabel();
	else
	Diagnose = L->getStmt() == nullptr;
	if (Diagnose)
	DiagReceiver(L->getLocation(), S.PDiag(diag::err_undeclared_label_use)
	<< L);
	}

	void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) {
	S->applyNRVO();

	if (S->decl_empty()) return;
	assert((S->getFlags() & (Scope::DeclScope \| Scope::TemplateParamScope)) &&
	"Scope shouldn't contain decls!");

	/// We visit the decls in non-deterministic order, but we want diagnostics
	/// emitted in deterministic order. Collect any diagnostic that may be emitted
	/// and sort the diagnostics before emitting them, after we visited all decls.
	struct LocAndDiag {
	SourceLocation Loc;
	std::optional<SourceLocation> PreviousDeclLoc;
	PartialDiagnostic PD;
	};
	SmallVector<LocAndDiag, 16> DeclDiags;
	auto addDiag = [&DeclDiags](SourceLocation Loc, PartialDiagnostic PD) {
	DeclDiags.push_back(LocAndDiag{Loc, std::nullopt, std::move(PD)});
	};
	auto addDiagWithPrev = [&DeclDiags](SourceLocation Loc,
	SourceLocation PreviousDeclLoc,
	PartialDiagnostic PD) {
	DeclDiags.push_back(LocAndDiag{Loc, PreviousDeclLoc, std::move(PD)});
	};

	for (auto *TmpD : S->decls()) {
	assert(TmpD && "This decl didn't get pushed??");

	assert(isa<NamedDecl>(TmpD) && "Decl isn't NamedDecl?");
	NamedDecl *D = cast<NamedDecl>(TmpD);

	// Diagnose unused variables in this scope.
	if (!S->hasUnrecoverableErrorOccurred()) {
	DiagnoseUnusedDecl(D, addDiag);
	if (const auto *RD = dyn_cast<RecordDecl>(D))
	DiagnoseUnusedNestedTypedefs(RD, addDiag);
	if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
	DiagnoseUnusedButSetDecl(VD, addDiag);
	RefsMinusAssignments.erase(VD);
	}
	}

	if (!D->getDeclName()) continue;

	// If this was a forward reference to a label, verify it was defined.
	if (LabelDecl *LD = dyn_cast<LabelDecl>(D))
	CheckPoppedLabel(LD, *this, addDiag);

	// Remove this name from our lexical scope, and warn on it if we haven't
	// already.
	IdResolver.RemoveDecl(D);
	auto ShadowI = ShadowingDecls.find(D);
	if (ShadowI != ShadowingDecls.end()) {
	if (const auto *FD = dyn_cast<FieldDecl>(ShadowI->second)) {
	addDiagWithPrev(D->getLocation(), FD->getLocation(),
	PDiag(diag::warn_ctor_parm_shadows_field)
	<< D << FD << FD->getParent());
	}
	ShadowingDecls.erase(ShadowI);
	}
	}

	llvm::sort(DeclDiags,
	[](const LocAndDiag &LHS, const LocAndDiag &RHS) -> bool {
	// The particular order for diagnostics is not important, as long
	// as the order is deterministic. Using the raw location is going
	// to generally be in source order unless there are macro
	// expansions involved.
	return LHS.Loc.getRawEncoding() < RHS.Loc.getRawEncoding();
	});
	for (const LocAndDiag &D : DeclDiags) {
	Diag(D.Loc, D.PD);
	if (D.PreviousDeclLoc)
	Diag(*D.PreviousDeclLoc, diag::note_previous_declaration);
	}
	}

	/// Look for an Objective-C class in the translation unit.
	///
	/// \param Id The name of the Objective-C class we're looking for. If
	/// typo-correction fixes this name, the Id will be updated
	/// to the fixed name.
	///
	/// \param IdLoc The location of the name in the translation unit.
	///
	/// \param DoTypoCorrection If true, this routine will attempt typo correction
	/// if there is no class with the given name.
	///
	/// \returns The declaration of the named Objective-C class, or NULL if the
	/// class could not be found.
	ObjCInterfaceDecl Sema::getObjCInterfaceDecl(IdentifierInfo &Id,
	SourceLocation IdLoc,
	bool DoTypoCorrection) {
	// The third "scope" argument is 0 since we aren't enabling lazy built-in
	// creation from this context.
	NamedDecl *IDecl = LookupSingleName(TUScope, Id, IdLoc, LookupOrdinaryName);

	if (!IDecl && DoTypoCorrection) {
	// Perform typo correction at the given location, but only if we
	// find an Objective-C class name.
	DeclFilterCCC<ObjCInterfaceDecl> CCC{};
	if (TypoCorrection C =
	CorrectTypo(DeclarationNameInfo(Id, IdLoc), LookupOrdinaryName,
	TUScope, nullptr, CCC, CTK_ErrorRecovery)) {
	diagnoseTypo(C, PDiag(diag::err_undef_interface_suggest) << Id);
	IDecl = C.getCorrectionDeclAs<ObjCInterfaceDecl>();
	Id = IDecl->getIdentifier();
	}
	}
	ObjCInterfaceDecl *Def = dyn_cast_or_null<ObjCInterfaceDecl>(IDecl);
	// This routine must always return a class definition, if any.
	if (Def && Def->getDefinition())
	Def = Def->getDefinition();
	return Def;
	}

	/// getNonFieldDeclScope - Retrieves the innermost scope, starting
	/// from S, where a non-field would be declared. This routine copes
	/// with the difference between C and C++ scoping rules in structs and
	/// unions. For example, the following code is well-formed in C but
	/// ill-formed in C++:
	/// @code
	/// struct S6 {
	/// enum { BAR } e;
	/// };
	///
	/// void test_S6() {
	/// struct S6 a;
	/// a.e = BAR;
	/// }
	/// @endcode
	/// For the declaration of BAR, this routine will return a different
	/// scope. The scope S will be the scope of the unnamed enumeration
	/// within S6. In C++, this routine will return the scope associated
	/// with S6, because the enumeration's scope is a transparent
	/// context but structures can contain non-field names. In C, this
	/// routine will return the translation unit scope, since the
	/// enumeration's scope is a transparent context and structures cannot
	/// contain non-field names.
	Scope Sema::getNonFieldDeclScope(Scope S) {
	while (((S->getFlags() & Scope::DeclScope) == 0) \|\|
	(S->getEntity() && S->getEntity()->isTransparentContext()) \|\|
	(S->isClassScope() && !getLangOpts().CPlusPlus))
	S = S->getParent();
	return S;
	}

	static StringRef getHeaderName(Builtin::Context &BuiltinInfo, unsigned ID,
	ASTContext::GetBuiltinTypeError Error) {
	switch (Error) {
	case ASTContext::GE_None:
	return "";
	case ASTContext::GE_Missing_type:
	return BuiltinInfo.getHeaderName(ID);
	case ASTContext::GE_Missing_stdio:
	return "stdio.h";
	case ASTContext::GE_Missing_setjmp:
	return "setjmp.h";
	case ASTContext::GE_Missing_ucontext:
	return "ucontext.h";
	}
	llvm_unreachable("unhandled error kind");
	}

	FunctionDecl Sema::CreateBuiltin(IdentifierInfo II, QualType Type,
	unsigned ID, SourceLocation Loc) {
	DeclContext *Parent = Context.getTranslationUnitDecl();

	if (getLangOpts().CPlusPlus) {
	LinkageSpecDecl *CLinkageDecl = LinkageSpecDecl::Create(
	Context, Parent, Loc, Loc, LinkageSpecDecl::lang_c, false);
	CLinkageDecl->setImplicit();
	Parent->addDecl(CLinkageDecl);
	Parent = CLinkageDecl;
	}

	FunctionDecl *New = FunctionDecl::Create(Context, Parent, Loc, Loc, II, Type,
	/TInfo=/nullptr, SC_Extern,
	getCurFPFeatures().isFPConstrained(),
	false, Type->isFunctionProtoType());
	New->setImplicit();
	New->addAttr(BuiltinAttr::CreateImplicit(Context, ID));

	// Create Decl objects for each parameter, adding them to the
	// FunctionDecl.
	if (const FunctionProtoType *FT = dyn_cast<FunctionProtoType>(Type)) {
	SmallVector<ParmVarDecl *, 16> Params;
	for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
	ParmVarDecl *parm = ParmVarDecl::Create(
	Context, New, SourceLocation(), SourceLocation(), nullptr,
	FT->getParamType(i), /TInfo=/nullptr, SC_None, nullptr);
	parm->setScopeInfo(0, i);
	Params.push_back(parm);
	}
	New->setParams(Params);
	}

	AddKnownFunctionAttributes(New);
	return New;
	}

	/// LazilyCreateBuiltin - The specified Builtin-ID was first used at
	/// file scope. lazily create a decl for it. ForRedeclaration is true
	/// if we're creating this built-in in anticipation of redeclaring the
	/// built-in.
	NamedDecl Sema::LazilyCreateBuiltin(IdentifierInfo II, unsigned ID,
	Scope *S, bool ForRedeclaration,
	SourceLocation Loc) {
	LookupNecessaryTypesForBuiltin(S, ID);

	ASTContext::GetBuiltinTypeError Error;
	QualType R = Context.GetBuiltinType(ID, Error);
	if (Error) {
	if (!ForRedeclaration)
	return nullptr;

	// If we have a builtin without an associated type we should not emit a
	// warning when we were not able to find a type for it.
	if (Error == ASTContext::GE_Missing_type \|\|
	Context.BuiltinInfo.allowTypeMismatch(ID))
	return nullptr;

	// If we could not find a type for setjmp it is because the jmp_buf type was
	// not defined prior to the setjmp declaration.
	if (Error == ASTContext::GE_Missing_setjmp) {
	Diag(Loc, diag::warn_implicit_decl_no_jmp_buf)
	<< Context.BuiltinInfo.getName(ID);
	return nullptr;
	}

	// Generally, we emit a warning that the declaration requires the
	// appropriate header.
	Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
	<< getHeaderName(Context.BuiltinInfo, ID, Error)
	<< Context.BuiltinInfo.getName(ID);
	return nullptr;
	}

	if (!ForRedeclaration &&
	(Context.BuiltinInfo.isPredefinedLibFunction(ID) \|\|
	Context.BuiltinInfo.isHeaderDependentFunction(ID))) {
	Diag(Loc, LangOpts.C99 ? diag::ext_implicit_lib_function_decl_c99
	: diag::ext_implicit_lib_function_decl)
	<< Context.BuiltinInfo.getName(ID) << R;
	if (const char *Header = Context.BuiltinInfo.getHeaderName(ID))
	Diag(Loc, diag::note_include_header_or_declare)
	<< Header << Context.BuiltinInfo.getName(ID);
	}

	if (R.isNull())
	return nullptr;

	FunctionDecl *New = CreateBuiltin(II, R, ID, Loc);
	RegisterLocallyScopedExternCDecl(New, S);

	// TUScope is the translation-unit scope to insert this function into.
	// FIXME: This is hideous. We need to teach PushOnScopeChains to
	// relate Scopes to DeclContexts, and probably eliminate CurContext
	// entirely, but we're not there yet.
	DeclContext *SavedContext = CurContext;
	CurContext = New->getDeclContext();
	PushOnScopeChains(New, TUScope);
	CurContext = SavedContext;
	return New;
	}

	/// Typedef declarations don't have linkage, but they still denote the same
	/// entity if their types are the same.
	/// FIXME: This is notionally doing the same thing as ASTReaderDecl's
	/// isSameEntity.
	static void filterNonConflictingPreviousTypedefDecls(Sema &S,
	TypedefNameDecl *Decl,
	LookupResult &Previous) {
	// This is only interesting when modules are enabled.
	if (!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility)
	return;

	// Empty sets are uninteresting.
	if (Previous.empty())
	return;

	LookupResult::Filter Filter = Previous.makeFilter();
	while (Filter.hasNext()) {
	NamedDecl *Old = Filter.next();

	// Non-hidden declarations are never ignored.
	if (S.isVisible(Old))
	continue;

	// Declarations of the same entity are not ignored, even if they have
	// different linkages.
	if (auto *OldTD = dyn_cast<TypedefNameDecl>(Old)) {
	if (S.Context.hasSameType(OldTD->getUnderlyingType(),
	Decl->getUnderlyingType()))
	continue;

	// If both declarations give a tag declaration a typedef name for linkage
	// purposes, then they declare the same entity.
	if (OldTD->getAnonDeclWithTypedefName(/AnyRedecl/true) &&
	Decl->getAnonDeclWithTypedefName())
	continue;
	}

	Filter.erase();
	}

	Filter.done();
	}

	bool Sema::isIncompatibleTypedef(TypeDecl Old, TypedefNameDecl New) {
	QualType OldType;
	if (TypedefNameDecl *OldTypedef = dyn_cast<TypedefNameDecl>(Old))
	OldType = OldTypedef->getUnderlyingType();
	else
	OldType = Context.getTypeDeclType(Old);
	QualType NewType = New->getUnderlyingType();

	if (NewType->isVariablyModifiedType()) {
	// Must not redefine a typedef with a variably-modified type.
	int Kind = isa<TypeAliasDecl>(Old) ? 1 : 0;
	Diag(New->getLocation(), diag::err_redefinition_variably_modified_typedef)
	<< Kind << NewType;
	if (Old->getLocation().isValid())
	notePreviousDefinition(Old, New->getLocation());
	New->setInvalidDecl();
	return true;
	}

	if (OldType != NewType &&
	!OldType->isDependentType() &&
	!NewType->isDependentType() &&
	!Context.hasSameType(OldType, NewType)) {
	int Kind = isa<TypeAliasDecl>(Old) ? 1 : 0;
	Diag(New->getLocation(), diag::err_redefinition_different_typedef)
	<< Kind << NewType << OldType;
	if (Old->getLocation().isValid())
	notePreviousDefinition(Old, New->getLocation());
	New->setInvalidDecl();
	return true;
	}
	return false;
	}

	/// MergeTypedefNameDecl - We just parsed a typedef 'New' which has the
	/// same name and scope as a previous declaration 'Old'. Figure out
	/// how to resolve this situation, merging decls or emitting
	/// diagnostics as appropriate. If there was an error, set New to be invalid.
	///
	void Sema::MergeTypedefNameDecl(Scope S, TypedefNameDecl New,
	LookupResult &OldDecls) {
	// If the new decl is known invalid already, don't bother doing any
	// merging checks.
	if (New->isInvalidDecl()) return;

	// Allow multiple definitions for ObjC built-in typedefs.
	// FIXME: Verify the underlying types are equivalent!
	if (getLangOpts().ObjC) {
	const IdentifierInfo *TypeID = New->getIdentifier();
	switch (TypeID->getLength()) {
	default: break;
	case 2:
	{
	if (!TypeID->isStr("id"))
	break;
	QualType T = New->getUnderlyingType();
	if (!T->isPointerType())
	break;
	if (!T->isVoidPointerType()) {
	QualType PT = T->castAs<PointerType>()->getPointeeType();
	if (!PT->isStructureType())
	break;
	}
	Context.setObjCIdRedefinitionType(T);
	// Install the built-in type for 'id', ignoring the current definition.
	New->setTypeForDecl(Context.getObjCIdType().getTypePtr());
	return;
	}
	case 5:
	if (!TypeID->isStr("Class"))
	break;
	Context.setObjCClassRedefinitionType(New->getUnderlyingType());
	// Install the built-in type for 'Class', ignoring the current definition.
	New->setTypeForDecl(Context.getObjCClassType().getTypePtr());
	return;
	case 3:
	if (!TypeID->isStr("SEL"))
	break;
	Context.setObjCSelRedefinitionType(New->getUnderlyingType());
	// Install the built-in type for 'SEL', ignoring the current definition.
	New->setTypeForDecl(Context.getObjCSelType().getTypePtr());
	return;
	}
	// Fall through - the typedef name was not a builtin type.
	}

	// Verify the old decl was also a type.
	TypeDecl *Old = OldDecls.getAsSingle<TypeDecl>();
	if (!Old) {
	Diag(New->getLocation(), diag::err_redefinition_different_kind)
	<< New->getDeclName();

	NamedDecl *OldD = OldDecls.getRepresentativeDecl();
	if (OldD->getLocation().isValid())
	notePreviousDefinition(OldD, New->getLocation());

	return New->setInvalidDecl();
	}

	// If the old declaration is invalid, just give up here.
	if (Old->isInvalidDecl())
	return New->setInvalidDecl();

	if (auto *OldTD = dyn_cast<TypedefNameDecl>(Old)) {
	auto OldTag = OldTD->getAnonDeclWithTypedefName(/AnyRedecl*/true);
	auto *NewTag = New->getAnonDeclWithTypedefName();
	NamedDecl *Hidden = nullptr;
	if (OldTag && NewTag &&
	OldTag->getCanonicalDecl() != NewTag->getCanonicalDecl() &&
	!hasVisibleDefinition(OldTag, &Hidden)) {
	// There is a definition of this tag, but it is not visible. Use it
	// instead of our tag.
	New->setTypeForDecl(OldTD->getTypeForDecl());
	if (OldTD->isModed())
	New->setModedTypeSourceInfo(OldTD->getTypeSourceInfo(),
	OldTD->getUnderlyingType());
	else
	New->setTypeSourceInfo(OldTD->getTypeSourceInfo());

	// Make the old tag definition visible.
	makeMergedDefinitionVisible(Hidden);

	// If this was an unscoped enumeration, yank all of its enumerators
	// out of the scope.
	if (isa<EnumDecl>(NewTag)) {
	Scope *EnumScope = getNonFieldDeclScope(S);
	for (auto *D : NewTag->decls()) {
	auto *ED = cast<EnumConstantDecl>(D);
	assert(EnumScope->isDeclScope(ED));
	EnumScope->RemoveDecl(ED);
	IdResolver.RemoveDecl(ED);
	ED->getLexicalDeclContext()->removeDecl(ED);
	}
	}
	}
	}

	// If the typedef types are not identical, reject them in all languages and
	// with any extensions enabled.
	if (isIncompatibleTypedef(Old, New))
	return;

	// The types match. Link up the redeclaration chain and merge attributes if
	// the old declaration was a typedef.
	if (TypedefNameDecl *Typedef = dyn_cast<TypedefNameDecl>(Old)) {
	New->setPreviousDecl(Typedef);
	mergeDeclAttributes(New, Old);
	}

	if (getLangOpts().MicrosoftExt)
	return;

	if (getLangOpts().CPlusPlus) {
	// C++ [dcl.typedef]p2:
	// In a given non-class scope, a typedef specifier can be used to
	// redefine the name of any type declared in that scope to refer
	// to the type to which it already refers.
	if (!isa<CXXRecordDecl>(CurContext))
	return;

	// C++0x [dcl.typedef]p4:
	// In a given class scope, a typedef specifier can be used to redefine
	// any class-name declared in that scope that is not also a typedef-name
	// to refer to the type to which it already refers.
	//
	// This wording came in via DR424, which was a correction to the
	// wording in DR56, which accidentally banned code like:
	//
	// struct S {
	// typedef struct A { } A;
	// };
	//
	// in the C++03 standard. We implement the C++0x semantics, which
	// allow the above but disallow
	//
	// struct S {
	// typedef int I;
	// typedef int I;
	// };
	//
	// since that was the intent of DR56.
	if (!isa<TypedefNameDecl>(Old))
	return;

	Diag(New->getLocation(), diag::err_redefinition)
	<< New->getDeclName();
	notePreviousDefinition(Old, New->getLocation());
	return New->setInvalidDecl();
	}

	// Modules always permit redefinition of typedefs, as does C11.
	if (getLangOpts().Modules \|\| getLangOpts().C11)
	return;

	// If we have a redefinition of a typedef in C, emit a warning. This warning
	// is normally mapped to an error, but can be controlled with
	// -Wtypedef-redefinition. If either the original or the redefinition is
	// in a system header, don't emit this for compatibility with GCC.
	if (getDiagnostics().getSuppressSystemWarnings() &&
	// Some standard types are defined implicitly in Clang (e.g. OpenCL).
	(Old->isImplicit() \|\|
	Context.getSourceManager().isInSystemHeader(Old->getLocation()) \|\|
	Context.getSourceManager().isInSystemHeader(New->getLocation())))
	return;

	Diag(New->getLocation(), diag::ext_redefinition_of_typedef)
	<< New->getDeclName();
	notePreviousDefinition(Old, New->getLocation());
	}

	/// DeclhasAttr - returns true if decl Declaration already has the target
	/// attribute.
	static bool DeclHasAttr(const Decl D, const Attr A) {
	const OwnershipAttr *OA = dyn_cast<OwnershipAttr>(A);
	const AnnotateAttr *Ann = dyn_cast<AnnotateAttr>(A);
	for (const auto *i : D->attrs())
	if (i->getKind() == A->getKind()) {
	if (Ann) {
	if (Ann->getAnnotation() == cast<AnnotateAttr>(i)->getAnnotation())
	return true;
	continue;
	}
	// FIXME: Don't hardcode this check
	if (OA && isa<OwnershipAttr>(i))
	return OA->getOwnKind() == cast<OwnershipAttr>(i)->getOwnKind();
	return true;
	}

	return false;
	}

	static bool isAttributeTargetADefinition(Decl *D) {
	if (VarDecl *VD = dyn_cast<VarDecl>(D))
	return VD->isThisDeclarationADefinition();
	if (TagDecl *TD = dyn_cast<TagDecl>(D))
	return TD->isCompleteDefinition() \|\| TD->isBeingDefined();
	return true;
	}

	/// Merge alignment attributes from \p Old to \p New, taking into account the
	/// special semantics of C11's _Alignas specifier and C++11's alignas attribute.
	///
	/// \return \c true if any attributes were added to \p New.
	static bool mergeAlignedAttrs(Sema &S, NamedDecl New, Decl Old) {
	// Look for alignas attributes on Old, and pick out whichever attribute
	// specifies the strictest alignment requirement.
	AlignedAttr *OldAlignasAttr = nullptr;
	AlignedAttr *OldStrictestAlignAttr = nullptr;
	unsigned OldAlign = 0;
	for (auto *I : Old->specific_attrs<AlignedAttr>()) {
	// FIXME: We have no way of representing inherited dependent alignments
	// in a case like:
	// template<int A, int B> struct alignas(A) X;
	// template<int A, int B> struct alignas(B) X {};
	// For now, we just ignore any alignas attributes which are not on the
	// definition in such a case.
	if (I->isAlignmentDependent())
	return false;

	if (I->isAlignas())
	OldAlignasAttr = I;

	unsigned Align = I->getAlignment(S.Context);
	if (Align > OldAlign) {
	OldAlign = Align;
	OldStrictestAlignAttr = I;
	}
	}

	// Look for alignas attributes on New.
	AlignedAttr *NewAlignasAttr = nullptr;
	unsigned NewAlign = 0;
	for (auto *I : New->specific_attrs<AlignedAttr>()) {
	if (I->isAlignmentDependent())
	return false;

	if (I->isAlignas())
	NewAlignasAttr = I;

	unsigned Align = I->getAlignment(S.Context);
	if (Align > NewAlign)
	NewAlign = Align;
	}

	if (OldAlignasAttr && NewAlignasAttr && OldAlign != NewAlign) {
	// Both declarations have 'alignas' attributes. We require them to match.
	// C++11 [dcl.align]p6 and C11 6.7.5/7 both come close to saying this, but
	// fall short. (If two declarations both have alignas, they must both match
	// every definition, and so must match each other if there is a definition.)

	// If either declaration only contains 'alignas(0)' specifiers, then it
	// specifies the natural alignment for the type.
	if (OldAlign == 0 \|\| NewAlign == 0) {
	QualType Ty;
	if (ValueDecl *VD = dyn_cast<ValueDecl>(New))
	Ty = VD->getType();
	else
	Ty = S.Context.getTagDeclType(cast<TagDecl>(New));

	if (OldAlign == 0)
	OldAlign = S.Context.getTypeAlign(Ty);
	if (NewAlign == 0)
	NewAlign = S.Context.getTypeAlign(Ty);
	}

	if (OldAlign != NewAlign) {
	S.Diag(NewAlignasAttr->getLocation(), diag::err_alignas_mismatch)
	<< (unsigned)S.Context.toCharUnitsFromBits(OldAlign).getQuantity()
	<< (unsigned)S.Context.toCharUnitsFromBits(NewAlign).getQuantity();
	S.Diag(OldAlignasAttr->getLocation(), diag::note_previous_declaration);
	}
	}

	if (OldAlignasAttr && !NewAlignasAttr && isAttributeTargetADefinition(New)) {
	// C++11 [dcl.align]p6:
	// if any declaration of an entity has an alignment-specifier,
	// every defining declaration of that entity shall specify an
	// equivalent alignment.
	// C11 6.7.5/7:
	// If the definition of an object does not have an alignment
	// specifier, any other declaration of that object shall also
	// have no alignment specifier.
	S.Diag(New->getLocation(), diag::err_alignas_missing_on_definition)
	<< OldAlignasAttr;
	S.Diag(OldAlignasAttr->getLocation(), diag::note_alignas_on_declaration)
	<< OldAlignasAttr;
	}

	bool AnyAdded = false;

	// Ensure we have an attribute representing the strictest alignment.
	if (OldAlign > NewAlign) {
	AlignedAttr *Clone = OldStrictestAlignAttr->clone(S.Context);
	Clone->setInherited(true);
	New->addAttr(Clone);
	AnyAdded = true;
	}

	// Ensure we have an alignas attribute if the old declaration had one.
	if (OldAlignasAttr && !NewAlignasAttr &&
	!(AnyAdded && OldStrictestAlignAttr->isAlignas())) {
	AlignedAttr *Clone = OldAlignasAttr->clone(S.Context);
	Clone->setInherited(true);
	New->addAttr(Clone);
	AnyAdded = true;
	}

	return AnyAdded;
	}

	#define WANT_DECL_MERGE_LOGIC
	#include "clang/Sema/AttrParsedAttrImpl.inc"
	#undef WANT_DECL_MERGE_LOGIC

	static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
	const InheritableAttr *Attr,
	Sema::AvailabilityMergeKind AMK) {
	// Diagnose any mutual exclusions between the attribute that we want to add
	// and attributes that already exist on the declaration.
	if (!DiagnoseMutualExclusions(S, D, Attr))
	return false;

	// This function copies an attribute Attr from a previous declaration to the
	// new declaration D if the new declaration doesn't itself have that attribute
	// yet or if that attribute allows duplicates.
	// If you're adding a new attribute that requires logic different from
	// "use explicit attribute on decl if present, else use attribute from
	// previous decl", for example if the attribute needs to be consistent
	// between redeclarations, you need to call a custom merge function here.
	InheritableAttr *NewAttr = nullptr;
	if (const auto *AA = dyn_cast<AvailabilityAttr>(Attr))
	NewAttr = S.mergeAvailabilityAttr(
	D, *AA, AA->getPlatform(), AA->isImplicit(), AA->getIntroduced(),
	AA->getDeprecated(), AA->getObsoleted(), AA->getUnavailable(),
	AA->getMessage(), AA->getStrict(), AA->getReplacement(), AMK,
	AA->getPriority());
	else if (const auto *VA = dyn_cast<VisibilityAttr>(Attr))
	NewAttr = S.mergeVisibilityAttr(D, *VA, VA->getVisibility());
	else if (const auto *VA = dyn_cast<TypeVisibilityAttr>(Attr))
	NewAttr = S.mergeTypeVisibilityAttr(D, *VA, VA->getVisibility());
	else if (const auto *ImportA = dyn_cast<DLLImportAttr>(Attr))
	NewAttr = S.mergeDLLImportAttr(D, *ImportA);
	else if (const auto *ExportA = dyn_cast<DLLExportAttr>(Attr))
	NewAttr = S.mergeDLLExportAttr(D, *ExportA);
	else if (const auto *EA = dyn_cast<ErrorAttr>(Attr))
	NewAttr = S.mergeErrorAttr(D, *EA, EA->getUserDiagnostic());
	else if (const auto *FA = dyn_cast<FormatAttr>(Attr))
	NewAttr = S.mergeFormatAttr(D, *FA, FA->getType(), FA->getFormatIdx(),
	FA->getFirstArg());
	else if (const auto *SA = dyn_cast<SectionAttr>(Attr))
	NewAttr = S.mergeSectionAttr(D, *SA, SA->getName());
	else if (const auto *CSA = dyn_cast<CodeSegAttr>(Attr))
	NewAttr = S.mergeCodeSegAttr(D, *CSA, CSA->getName());
	else if (const auto *IA = dyn_cast<MSInheritanceAttr>(Attr))
	NewAttr = S.mergeMSInheritanceAttr(D, *IA, IA->getBestCase(),
	IA->getInheritanceModel());
	else if (const auto *AA = dyn_cast<AlwaysInlineAttr>(Attr))
	NewAttr = S.mergeAlwaysInlineAttr(D, *AA,
	&S.Context.Idents.get(AA->getSpelling()));
	else if (S.getLangOpts().CUDA && isa<FunctionDecl>(D) &&
	(isa<CUDAHostAttr>(Attr) \|\| isa<CUDADeviceAttr>(Attr) \|\|
	isa<CUDAGlobalAttr>(Attr))) {
	// CUDA target attributes are part of function signature for
	// overloading purposes and must not be merged.
	return false;
	} else if (const auto *MA = dyn_cast<MinSizeAttr>(Attr))
	NewAttr = S.mergeMinSizeAttr(D, *MA);
	else if (const auto *SNA = dyn_cast<SwiftNameAttr>(Attr))
	NewAttr = S.mergeSwiftNameAttr(D, *SNA, SNA->getName());
	else if (const auto *OA = dyn_cast<OptimizeNoneAttr>(Attr))
	NewAttr = S.mergeOptimizeNoneAttr(D, *OA);
	else if (const auto *InternalLinkageA = dyn_cast<InternalLinkageAttr>(Attr))
	NewAttr = S.mergeInternalLinkageAttr(D, *InternalLinkageA);
	else if (isa<AlignedAttr>(Attr))
	// AlignedAttrs are handled separately, because we need to handle all
	// such attributes on a declaration at the same time.
	NewAttr = nullptr;
	else if ((isa<DeprecatedAttr>(Attr) \|\| isa<UnavailableAttr>(Attr)) &&
	(AMK == Sema::AMK_Override \|\|
	AMK == Sema::AMK_ProtocolImplementation \|\|
	AMK == Sema::AMK_OptionalProtocolImplementation))
	NewAttr = nullptr;
	else if (const auto *UA = dyn_cast<UuidAttr>(Attr))
	NewAttr = S.mergeUuidAttr(D, *UA, UA->getGuid(), UA->getGuidDecl());
	else if (const auto *IMA = dyn_cast<WebAssemblyImportModuleAttr>(Attr))
	NewAttr = S.mergeImportModuleAttr(D, *IMA);
	else if (const auto *INA = dyn_cast<WebAssemblyImportNameAttr>(Attr))
	NewAttr = S.mergeImportNameAttr(D, *INA);
	else if (const auto *TCBA = dyn_cast<EnforceTCBAttr>(Attr))
	NewAttr = S.mergeEnforceTCBAttr(D, *TCBA);
	else if (const auto *TCBLA = dyn_cast<EnforceTCBLeafAttr>(Attr))
	NewAttr = S.mergeEnforceTCBLeafAttr(D, *TCBLA);
	else if (const auto *BTFA = dyn_cast<BTFDeclTagAttr>(Attr))
	NewAttr = S.mergeBTFDeclTagAttr(D, *BTFA);
	else if (const auto *NT = dyn_cast<HLSLNumThreadsAttr>(Attr))
	NewAttr =
	S.mergeHLSLNumThreadsAttr(D, *NT, NT->getX(), NT->getY(), NT->getZ());
	else if (const auto *SA = dyn_cast<HLSLShaderAttr>(Attr))
	NewAttr = S.mergeHLSLShaderAttr(D, *SA, SA->getType());
	else if (Attr->shouldInheritEvenIfAlreadyPresent() \|\| !DeclHasAttr(D, Attr))
	NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));

	if (NewAttr) {
	NewAttr->setInherited(true);
	D->addAttr(NewAttr);
	if (isa<MSInheritanceAttr>(NewAttr))
	S.Consumer.AssignInheritanceModel(cast<CXXRecordDecl>(D));
	return true;
	}

	return false;
	}

	static const NamedDecl getDefinition(const Decl D) {
	if (const TagDecl *TD = dyn_cast<TagDecl>(D))
	return TD->getDefinition();
	if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
	const VarDecl *Def = VD->getDefinition();
	if (Def)
	return Def;
	return VD->getActingDefinition();
	}
	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	const FunctionDecl *Def = nullptr;
	if (FD->isDefined(Def, true))
	return Def;
	}
	return nullptr;
	}

	static bool hasAttribute(const Decl *D, attr::Kind Kind) {
	for (const auto *Attribute : D->attrs())
	if (Attribute->getKind() == Kind)
	return true;
	return false;
	}

	/// checkNewAttributesAfterDef - If we already have a definition, check that
	/// there are no new attributes in this declaration.
	static void checkNewAttributesAfterDef(Sema &S, Decl New, const Decl Old) {
	if (!New->hasAttrs())
	return;

	const NamedDecl *Def = getDefinition(Old);
	if (!Def \|\| Def == New)
	return;

	AttrVec &NewAttributes = New->getAttrs();
	for (unsigned I = 0, E = NewAttributes.size(); I != E;) {
	const Attr *NewAttribute = NewAttributes[I];

	if (isa<AliasAttr>(NewAttribute) \|\| isa<IFuncAttr>(NewAttribute)) {
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(New)) {
	Sema::SkipBodyInfo SkipBody;
	S.CheckForFunctionRedefinition(FD, cast<FunctionDecl>(Def), &SkipBody);

	// If we're skipping this definition, drop the "alias" attribute.
	if (SkipBody.ShouldSkip) {
	NewAttributes.erase(NewAttributes.begin() + I);
	--E;
	continue;
	}
	} else {
	VarDecl *VD = cast<VarDecl>(New);
	unsigned Diag = cast<VarDecl>(Def)->isThisDeclarationADefinition() ==
	VarDecl::TentativeDefinition
	? diag::err_alias_after_tentative
	: diag::err_redefinition;
	S.Diag(VD->getLocation(), Diag) << VD->getDeclName();
	if (Diag == diag::err_redefinition)
	S.notePreviousDefinition(Def, VD->getLocation());
	else
	S.Diag(Def->getLocation(), diag::note_previous_definition);
	VD->setInvalidDecl();
	}
	++I;
	continue;
	}

	if (const VarDecl *VD = dyn_cast<VarDecl>(Def)) {
	// Tentative definitions are only interesting for the alias check above.
	if (VD->isThisDeclarationADefinition() != VarDecl::Definition) {
	++I;
	continue;
	}
	}

	if (hasAttribute(Def, NewAttribute->getKind())) {
	++I;
	continue; // regular attr merging will take care of validating this.
	}

	if (isa<C11NoReturnAttr>(NewAttribute)) {
	// C's _Noreturn is allowed to be added to a function after it is defined.
	++I;
	continue;
	} else if (isa<UuidAttr>(NewAttribute)) {
	// msvc will allow a subsequent definition to add an uuid to a class
	++I;
	continue;
	} else if (const AlignedAttr *AA = dyn_cast<AlignedAttr>(NewAttribute)) {
	if (AA->isAlignas()) {
	// C++11 [dcl.align]p6:
	// if any declaration of an entity has an alignment-specifier,
	// every defining declaration of that entity shall specify an
	// equivalent alignment.
	// C11 6.7.5/7:
	// If the definition of an object does not have an alignment
	// specifier, any other declaration of that object shall also
	// have no alignment specifier.
	S.Diag(Def->getLocation(), diag::err_alignas_missing_on_definition)
	<< AA;
	S.Diag(NewAttribute->getLocation(), diag::note_alignas_on_declaration)
	<< AA;
	NewAttributes.erase(NewAttributes.begin() + I);
	--E;
	continue;
	}
	} else if (isa<LoaderUninitializedAttr>(NewAttribute)) {
	// If there is a C definition followed by a redeclaration with this
	// attribute then there are two different definitions. In C++, prefer the
	// standard diagnostics.
	if (!S.getLangOpts().CPlusPlus) {
	S.Diag(NewAttribute->getLocation(),
	diag::err_loader_uninitialized_redeclaration);
	S.Diag(Def->getLocation(), diag::note_previous_definition);
	NewAttributes.erase(NewAttributes.begin() + I);
	--E;
	continue;
	}
	} else if (isa<SelectAnyAttr>(NewAttribute) &&
	cast<VarDecl>(New)->isInline() &&
	!cast<VarDecl>(New)->isInlineSpecified()) {
	// Don't warn about applying selectany to implicitly inline variables.
	// Older compilers and language modes would require the use of selectany
	// to make such variables inline, and it would have no effect if we
	// honored it.
	++I;
	continue;
	} else if (isa<OMPDeclareVariantAttr>(NewAttribute)) {
	// We allow to add OMP[Begin]DeclareVariantAttr to be added to
	// declarations after definitions.
	++I;
	continue;
	}

	S.Diag(NewAttribute->getLocation(),
	diag::warn_attribute_precede_definition);
	S.Diag(Def->getLocation(), diag::note_previous_definition);
	NewAttributes.erase(NewAttributes.begin() + I);
	--E;
	}
	}

	static void diagnoseMissingConstinit(Sema &S, const VarDecl *InitDecl,
	const ConstInitAttr *CIAttr,
	bool AttrBeforeInit) {
	SourceLocation InsertLoc = InitDecl->getInnerLocStart();

	// Figure out a good way to write this specifier on the old declaration.
	// FIXME: We should just use the spelling of CIAttr, but we don't preserve
	// enough of the attribute list spelling information to extract that without
	// heroics.
	std::string SuitableSpelling;
	if (S.getLangOpts().CPlusPlus20)
	SuitableSpelling = std::string(
	S.PP.getLastMacroWithSpelling(InsertLoc, {tok::kw_constinit}));
	if (SuitableSpelling.empty() && S.getLangOpts().CPlusPlus11)
	SuitableSpelling = std::string(S.PP.getLastMacroWithSpelling(
	InsertLoc, {tok::l_square, tok::l_square,
	S.PP.getIdentifierInfo("clang"), tok::coloncolon,
	S.PP.getIdentifierInfo("require_constant_initialization"),
	tok::r_square, tok::r_square}));
	if (SuitableSpelling.empty())
	SuitableSpelling = std::string(S.PP.getLastMacroWithSpelling(
	InsertLoc, {tok::kw___attribute, tok::l_paren, tok::r_paren,
	S.PP.getIdentifierInfo("require_constant_initialization"),
	tok::r_paren, tok::r_paren}));
	if (SuitableSpelling.empty() && S.getLangOpts().CPlusPlus20)
	SuitableSpelling = "constinit";
	if (SuitableSpelling.empty() && S.getLangOpts().CPlusPlus11)
	SuitableSpelling = "[[clang::require_constant_initialization]]";
	if (SuitableSpelling.empty())
	SuitableSpelling = "__attribute__((require_constant_initialization))";
	SuitableSpelling += " ";

	if (AttrBeforeInit) {
	// extern constinit int a;
	// int a = 0; // error (missing 'constinit'), accepted as extension
	assert(CIAttr->isConstinit() && "should not diagnose this for attribute");
	S.Diag(InitDecl->getLocation(), diag::ext_constinit_missing)
	<< InitDecl << FixItHint::CreateInsertion(InsertLoc, SuitableSpelling);
	S.Diag(CIAttr->getLocation(), diag::note_constinit_specified_here);
	} else {
	// int a = 0;
	// constinit extern int a; // error (missing 'constinit')
	S.Diag(CIAttr->getLocation(),
	CIAttr->isConstinit() ? diag::err_constinit_added_too_late
	: diag::warn_require_const_init_added_too_late)
	<< FixItHint::CreateRemoval(SourceRange(CIAttr->getLocation()));
	S.Diag(InitDecl->getLocation(), diag::note_constinit_missing_here)
	<< CIAttr->isConstinit()
	<< FixItHint::CreateInsertion(InsertLoc, SuitableSpelling);
	}
	}

	/// mergeDeclAttributes - Copy attributes from the Old decl to the New one.
	void Sema::mergeDeclAttributes(NamedDecl New, Decl Old,
	AvailabilityMergeKind AMK) {
	if (UsedAttr *OldAttr = Old->getMostRecentDecl()->getAttr<UsedAttr>()) {
	UsedAttr *NewAttr = OldAttr->clone(Context);
	NewAttr->setInherited(true);
	New->addAttr(NewAttr);
	}
	if (RetainAttr *OldAttr = Old->getMostRecentDecl()->getAttr<RetainAttr>()) {
	RetainAttr *NewAttr = OldAttr->clone(Context);
	NewAttr->setInherited(true);
	New->addAttr(NewAttr);
	}

	if (!Old->hasAttrs() && !New->hasAttrs())
	return;

	// [dcl.constinit]p1:
	// If the [constinit] specifier is applied to any declaration of a
	// variable, it shall be applied to the initializing declaration.
	const auto *OldConstInit = Old->getAttr<ConstInitAttr>();
	const auto *NewConstInit = New->getAttr<ConstInitAttr>();
	if (bool(OldConstInit) != bool(NewConstInit)) {
	const auto *OldVD = cast<VarDecl>(Old);
	auto *NewVD = cast<VarDecl>(New);

	// Find the initializing declaration. Note that we might not have linked
	// the new declaration into the redeclaration chain yet.
	const VarDecl *InitDecl = OldVD->getInitializingDeclaration();
	if (!InitDecl &&
	(NewVD->hasInit() \|\| NewVD->isThisDeclarationADefinition()))
	InitDecl = NewVD;

	if (InitDecl == NewVD) {
	// This is the initializing declaration. If it would inherit 'constinit',
	// that's ill-formed. (Note that we do not apply this to the attribute
	// form).
	if (OldConstInit && OldConstInit->isConstinit())
	diagnoseMissingConstinit(*this, NewVD, OldConstInit,
	/AttrBeforeInit=/true);
	} else if (NewConstInit) {
	// This is the first time we've been told that this declaration should
	// have a constant initializer. If we already saw the initializing
	// declaration, this is too late.
	if (InitDecl && InitDecl != NewVD) {
	diagnoseMissingConstinit(*this, InitDecl, NewConstInit,
	/AttrBeforeInit=/false);
	NewVD->dropAttr<ConstInitAttr>();
	}
	}
	}

	// Attributes declared post-definition are currently ignored.
	checkNewAttributesAfterDef(*this, New, Old);

	if (AsmLabelAttr *NewA = New->getAttr<AsmLabelAttr>()) {
	if (AsmLabelAttr *OldA = Old->getAttr<AsmLabelAttr>()) {
	if (!OldA->isEquivalent(NewA)) {
	// This redeclaration changes __asm__ label.
	Diag(New->getLocation(), diag::err_different_asm_label);
	Diag(OldA->getLocation(), diag::note_previous_declaration);
	}
	} else if (Old->isUsed()) {
	// This redeclaration adds an __asm__ label to a declaration that has
	// already been ODR-used.
	Diag(New->getLocation(), diag::err_late_asm_label_name)
	<< isa<FunctionDecl>(Old) << New->getAttr<AsmLabelAttr>()->getRange();
	}
	}

	// Re-declaration cannot add abi_tag's.
	if (const auto *NewAbiTagAttr = New->getAttr<AbiTagAttr>()) {
	if (const auto *OldAbiTagAttr = Old->getAttr<AbiTagAttr>()) {
	for (const auto &NewTag : NewAbiTagAttr->tags()) {
	if (!llvm::is_contained(OldAbiTagAttr->tags(), NewTag)) {
	Diag(NewAbiTagAttr->getLocation(),
	diag::err_new_abi_tag_on_redeclaration)
	<< NewTag;
	Diag(OldAbiTagAttr->getLocation(), diag::note_previous_declaration);
	}
	}
	} else {
	Diag(NewAbiTagAttr->getLocation(), diag::err_abi_tag_on_redeclaration);
	Diag(Old->getLocation(), diag::note_previous_declaration);
	}
	}

	// This redeclaration adds a section attribute.
	if (New->hasAttr<SectionAttr>() && !Old->hasAttr<SectionAttr>()) {
	if (auto *VD = dyn_cast<VarDecl>(New)) {
	if (VD->isThisDeclarationADefinition() == VarDecl::DeclarationOnly) {
	Diag(New->getLocation(), diag::warn_attribute_section_on_redeclaration);
	Diag(Old->getLocation(), diag::note_previous_declaration);
	}
	}
	}

	// Redeclaration adds code-seg attribute.
	const auto *NewCSA = New->getAttr<CodeSegAttr>();
	if (NewCSA && !Old->hasAttr<CodeSegAttr>() &&
	!NewCSA->isImplicit() && isa<CXXMethodDecl>(New)) {
	Diag(New->getLocation(), diag::warn_mismatched_section)
	<< 0 /codeseg/;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	}

	if (!Old->hasAttrs())
	return;

	bool foundAny = New->hasAttrs();

	// Ensure that any moving of objects within the allocated map is done before
	// we process them.
	if (!foundAny) New->setAttrs(AttrVec());

	for (auto *I : Old->specific_attrs<InheritableAttr>()) {
	// Ignore deprecated/unavailable/availability attributes if requested.
	AvailabilityMergeKind LocalAMK = AMK_None;
	if (isa<DeprecatedAttr>(I) \|\|
	isa<UnavailableAttr>(I) \|\|
	isa<AvailabilityAttr>(I)) {
	switch (AMK) {
	case AMK_None:
	continue;

	case AMK_Redeclaration:
	case AMK_Override:
	case AMK_ProtocolImplementation:
	case AMK_OptionalProtocolImplementation:
	LocalAMK = AMK;
	break;
	}
	}

	// Already handled.
	if (isa<UsedAttr>(I) \|\| isa<RetainAttr>(I))
	continue;

	if (mergeDeclAttribute(*this, New, I, LocalAMK))
	foundAny = true;
	}

	if (mergeAlignedAttrs(*this, New, Old))
	foundAny = true;

	if (!foundAny) New->dropAttrs();
	}

	/// mergeParamDeclAttributes - Copy attributes from the old parameter
	/// to the new one.
	static void mergeParamDeclAttributes(ParmVarDecl *newDecl,
	const ParmVarDecl *oldDecl,
	Sema &S) {
	// C++11 [dcl.attr.depend]p2:
	// The first declaration of a function shall specify the
	// carries_dependency attribute for its declarator-id if any declaration
	// of the function specifies the carries_dependency attribute.
	const CarriesDependencyAttr *CDA = newDecl->getAttr<CarriesDependencyAttr>();
	if (CDA && !oldDecl->hasAttr<CarriesDependencyAttr>()) {
	S.Diag(CDA->getLocation(),
	diag::err_carries_dependency_missing_on_first_decl) << 1/Param/;
	// Find the first declaration of the parameter.
	// FIXME: Should we build redeclaration chains for function parameters?
	const FunctionDecl *FirstFD =
	cast<FunctionDecl>(oldDecl->getDeclContext())->getFirstDecl();
	const ParmVarDecl *FirstVD =
	FirstFD->getParamDecl(oldDecl->getFunctionScopeIndex());
	S.Diag(FirstVD->getLocation(),
	diag::note_carries_dependency_missing_first_decl) << 1/Param/;
	}

	if (!oldDecl->hasAttrs())
	return;

	bool foundAny = newDecl->hasAttrs();

	// Ensure that any moving of objects within the allocated map is
	// done before we process them.
	if (!foundAny) newDecl->setAttrs(AttrVec());

	for (const auto *I : oldDecl->specific_attrs<InheritableParamAttr>()) {
	if (!DeclHasAttr(newDecl, I)) {
	InheritableAttr *newAttr =
	cast<InheritableParamAttr>(I->clone(S.Context));
	newAttr->setInherited(true);
	newDecl->addAttr(newAttr);
	foundAny = true;
	}
	}

	if (!foundAny) newDecl->dropAttrs();
	}

	static bool EquivalentArrayTypes(QualType Old, QualType New,
	const ASTContext &Ctx) {

	auto NoSizeInfo = [&Ctx](QualType Ty) {
	if (Ty->isIncompleteArrayType() \|\| Ty->isPointerType())
	return true;
	if (const auto *VAT = Ctx.getAsVariableArrayType(Ty))
	return VAT->getSizeModifier() == ArrayType::ArraySizeModifier::Star;
	return false;
	};

	// `type[]` is equivalent to `type ` and `type[]`.
	if (NoSizeInfo(Old) && NoSizeInfo(New))
	return true;

	// Don't try to compare VLA sizes, unless one of them has the star modifier.
	if (Old->isVariableArrayType() && New->isVariableArrayType()) {
	const auto *OldVAT = Ctx.getAsVariableArrayType(Old);
	const auto *NewVAT = Ctx.getAsVariableArrayType(New);
	if ((OldVAT->getSizeModifier() == ArrayType::ArraySizeModifier::Star) ^
	(NewVAT->getSizeModifier() == ArrayType::ArraySizeModifier::Star))
	return false;
	return true;
	}

	// Only compare size, ignore Size modifiers and CVR.
	if (Old->isConstantArrayType() && New->isConstantArrayType()) {
	return Ctx.getAsConstantArrayType(Old)->getSize() ==
	Ctx.getAsConstantArrayType(New)->getSize();
	}

	// Don't try to compare dependent sized array
	if (Old->isDependentSizedArrayType() && New->isDependentSizedArrayType()) {
	return true;
	}

	return Old == New;
	}

	static void mergeParamDeclTypes(ParmVarDecl *NewParam,
	const ParmVarDecl *OldParam,
	Sema &S) {
	if (auto Oldnullability = OldParam->getType()->getNullability()) {
	if (auto Newnullability = NewParam->getType()->getNullability()) {
	if (Oldnullability != Newnullability) {
	S.Diag(NewParam->getLocation(), diag::warn_mismatched_nullability_attr)
	<< DiagNullabilityKind(
	*Newnullability,
	((NewParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
	!= 0))
	<< DiagNullabilityKind(
	*Oldnullability,
	((OldParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
	!= 0));
	S.Diag(OldParam->getLocation(), diag::note_previous_declaration);
	}
	} else {
	QualType NewT = NewParam->getType();
	NewT = S.Context.getAttributedType(
	AttributedType::getNullabilityAttrKind(*Oldnullability),
	NewT, NewT);
	NewParam->setType(NewT);
	}
	}
	const auto *OldParamDT = dyn_cast<DecayedType>(OldParam->getType());
	const auto *NewParamDT = dyn_cast<DecayedType>(NewParam->getType());
	if (OldParamDT && NewParamDT &&
	OldParamDT->getPointeeType() == NewParamDT->getPointeeType()) {
	QualType OldParamOT = OldParamDT->getOriginalType();
	QualType NewParamOT = NewParamDT->getOriginalType();
	if (!EquivalentArrayTypes(OldParamOT, NewParamOT, S.getASTContext())) {
	S.Diag(NewParam->getLocation(), diag::warn_inconsistent_array_form)
	<< NewParam << NewParamOT;
	S.Diag(OldParam->getLocation(), diag::note_previous_declaration_as)
	<< OldParamOT;
	}
	}
	}

	namespace {

	/// Used in MergeFunctionDecl to keep track of function parameters in
	/// C.
	struct GNUCompatibleParamWarning {
	ParmVarDecl *OldParm;
	ParmVarDecl *NewParm;
	QualType PromotedType;
	};

	} // end anonymous namespace

	// Determine whether the previous declaration was a definition, implicit
	// declaration, or a declaration.
	template <typename T>
	static std::pair<diag::kind, SourceLocation>
	getNoteDiagForInvalidRedeclaration(const T Old, const T New) {
	diag::kind PrevDiag;
	SourceLocation OldLocation = Old->getLocation();
	if (Old->isThisDeclarationADefinition())
	PrevDiag = diag::note_previous_definition;
	else if (Old->isImplicit()) {
	PrevDiag = diag::note_previous_implicit_declaration;
	if (const auto *FD = dyn_cast<FunctionDecl>(Old)) {
	if (FD->getBuiltinID())
	PrevDiag = diag::note_previous_builtin_declaration;
	}
	if (OldLocation.isInvalid())
	OldLocation = New->getLocation();
	} else
	PrevDiag = diag::note_previous_declaration;
	return std::make_pair(PrevDiag, OldLocation);
	}

	/// canRedefineFunction - checks if a function can be redefined. Currently,
	/// only extern inline functions can be redefined, and even then only in
	/// GNU89 mode.
	static bool canRedefineFunction(const FunctionDecl *FD,
	const LangOptions& LangOpts) {
	return ((FD->hasAttr<GNUInlineAttr>() \|\| LangOpts.GNUInline) &&
	!LangOpts.CPlusPlus &&
	FD->isInlineSpecified() &&
	FD->getStorageClass() == SC_Extern);
	}

	const AttributedType *Sema::getCallingConvAttributedType(QualType T) const {
	const AttributedType *AT = T->getAs<AttributedType>();
	while (AT && !AT->isCallingConv())
	AT = AT->getModifiedType()->getAs<AttributedType>();
	return AT;
	}

	template <typename T>
	static bool haveIncompatibleLanguageLinkages(const T Old, const T New) {
	const DeclContext *DC = Old->getDeclContext();
	if (DC->isRecord())
	return false;

	LanguageLinkage OldLinkage = Old->getLanguageLinkage();
	if (OldLinkage == CXXLanguageLinkage && New->isInExternCContext())
	return true;
	if (OldLinkage == CLanguageLinkage && New->isInExternCXXContext())
	return true;
	return false;
	}

	template<typename T> static bool isExternC(T *D) { return D->isExternC(); }
	static bool isExternC(VarTemplateDecl *) { return false; }
	static bool isExternC(FunctionTemplateDecl *) { return false; }

	/// Check whether a redeclaration of an entity introduced by a
	/// using-declaration is valid, given that we know it's not an overload
	/// (nor a hidden tag declaration).
	template<typename ExpectedDecl>
	static bool checkUsingShadowRedecl(Sema &S, UsingShadowDecl *OldS,
	ExpectedDecl *New) {
	// C++11 [basic.scope.declarative]p4:
	// Given a set of declarations in a single declarative region, each of
	// which specifies the same unqualified name,
	// -- they shall all refer to the same entity, or all refer to functions
	// and function templates; or
	// -- exactly one declaration shall declare a class name or enumeration
	// name that is not a typedef name and the other declarations shall all
	// refer to the same variable or enumerator, or all refer to functions
	// and function templates; in this case the class name or enumeration
	// name is hidden (3.3.10).

	// C++11 [namespace.udecl]p14:
	// If a function declaration in namespace scope or block scope has the
	// same name and the same parameter-type-list as a function introduced
	// by a using-declaration, and the declarations do not declare the same
	// function, the program is ill-formed.

	auto *Old = dyn_cast<ExpectedDecl>(OldS->getTargetDecl());
	if (Old &&
	!Old->getDeclContext()->getRedeclContext()->Equals(
	New->getDeclContext()->getRedeclContext()) &&
	!(isExternC(Old) && isExternC(New)))
	Old = nullptr;

	if (!Old) {
	S.Diag(New->getLocation(), diag::err_using_decl_conflict_reverse);
	S.Diag(OldS->getTargetDecl()->getLocation(), diag::note_using_decl_target);
	S.Diag(OldS->getIntroducer()->getLocation(), diag::note_using_decl) << 0;
	return true;
	}
	return false;
	}

	static bool hasIdenticalPassObjectSizeAttrs(const FunctionDecl *A,
	const FunctionDecl *B) {
	assert(A->getNumParams() == B->getNumParams());

	auto AttrEq = [](const ParmVarDecl A, const ParmVarDecl B) {
	const auto *AttrA = A->getAttr<PassObjectSizeAttr>();
	const auto *AttrB = B->getAttr<PassObjectSizeAttr>();
	if (AttrA == AttrB)
	return true;
	return AttrA && AttrB && AttrA->getType() == AttrB->getType() &&
	AttrA->isDynamic() == AttrB->isDynamic();
	};

	return std::equal(A->param_begin(), A->param_end(), B->param_begin(), AttrEq);
	}

	/// If necessary, adjust the semantic declaration context for a qualified
	/// declaration to name the correct inline namespace within the qualifier.
	static void adjustDeclContextForDeclaratorDecl(DeclaratorDecl *NewD,
	DeclaratorDecl *OldD) {
	// The only case where we need to update the DeclContext is when
	// redeclaration lookup for a qualified name finds a declaration
	// in an inline namespace within the context named by the qualifier:
	//
	// inline namespace N { int f(); }
	// int ::f(); // Sema DC needs adjusting from :: to N::.
	//
	// For unqualified declarations, the semantic context can change
	// along the redeclaration chain (for local extern declarations,
	// extern "C" declarations, and friend declarations in particular).
	if (!NewD->getQualifier())
	return;

	// NewD is probably already in the right context.
	auto *NamedDC = NewD->getDeclContext()->getRedeclContext();
	auto *SemaDC = OldD->getDeclContext()->getRedeclContext();
	if (NamedDC->Equals(SemaDC))
	return;

	assert((NamedDC->InEnclosingNamespaceSetOf(SemaDC) \|\|
	NewD->isInvalidDecl() \|\| OldD->isInvalidDecl()) &&
	"unexpected context for redeclaration");

	auto *LexDC = NewD->getLexicalDeclContext();
	auto FixSemaDC = [=](NamedDecl *D) {
	if (!D)
	return;
	D->setDeclContext(SemaDC);
	D->setLexicalDeclContext(LexDC);
	};

	FixSemaDC(NewD);
	if (auto *FD = dyn_cast<FunctionDecl>(NewD))
	FixSemaDC(FD->getDescribedFunctionTemplate());
	else if (auto *VD = dyn_cast<VarDecl>(NewD))
	FixSemaDC(VD->getDescribedVarTemplate());
	}

	/// MergeFunctionDecl - We just parsed a function 'New' from
	/// declarator D which has the same name and scope as a previous
	/// declaration 'Old'. Figure out how to resolve this situation,
	/// merging decls or emitting diagnostics as appropriate.
	///
	/// In C++, New and Old must be declarations that are not
	/// overloaded. Use IsOverload to determine whether New and Old are
	/// overloaded, and to select the Old declaration that New should be
	/// merged with.
	///
	/// Returns true if there was an error, false otherwise.
	bool Sema::MergeFunctionDecl(FunctionDecl New, NamedDecl &OldD, Scope *S,
	bool MergeTypeWithOld, bool NewDeclIsDefn) {
	// Verify the old decl was also a function.
	FunctionDecl *Old = OldD->getAsFunction();
	if (!Old) {
	if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(OldD)) {
	if (New->getFriendObjectKind()) {
	Diag(New->getLocation(), diag::err_using_decl_friend);
	Diag(Shadow->getTargetDecl()->getLocation(),
	diag::note_using_decl_target);
	Diag(Shadow->getIntroducer()->getLocation(), diag::note_using_decl)
	<< 0;
	return true;
	}

	// Check whether the two declarations might declare the same function or
	// function template.
	if (FunctionTemplateDecl *NewTemplate =
	New->getDescribedFunctionTemplate()) {
	if (checkUsingShadowRedecl<FunctionTemplateDecl>(*this, Shadow,
	NewTemplate))
	return true;
	OldD = Old = cast<FunctionTemplateDecl>(Shadow->getTargetDecl())
	->getAsFunction();
	} else {
	if (checkUsingShadowRedecl<FunctionDecl>(*this, Shadow, New))
	return true;
	OldD = Old = cast<FunctionDecl>(Shadow->getTargetDecl());
	}
	} else {
	Diag(New->getLocation(), diag::err_redefinition_different_kind)
	<< New->getDeclName();
	notePreviousDefinition(OldD, New->getLocation());
	return true;
	}
	}

	// If the old declaration was found in an inline namespace and the new
	// declaration was qualified, update the DeclContext to match.
	adjustDeclContextForDeclaratorDecl(New, Old);

	// If the old declaration is invalid, just give up here.
	if (Old->isInvalidDecl())
	return true;

	// Disallow redeclaration of some builtins.
	if (!getASTContext().canBuiltinBeRedeclared(Old)) {
	Diag(New->getLocation(), diag::err_builtin_redeclare) << Old->getDeclName();
	Diag(Old->getLocation(), diag::note_previous_builtin_declaration)
	<< Old << Old->getType();
	return true;
	}

	diag::kind PrevDiag;
	SourceLocation OldLocation;
	std::tie(PrevDiag, OldLocation) =
	getNoteDiagForInvalidRedeclaration(Old, New);

	// Don't complain about this if we're in GNU89 mode and the old function
	// is an extern inline function.
	// Don't complain about specializations. They are not supposed to have
	// storage classes.
	if (!isa<CXXMethodDecl>(New) && !isa<CXXMethodDecl>(Old) &&
	New->getStorageClass() == SC_Static &&
	Old->hasExternalFormalLinkage() &&
	!New->getTemplateSpecializationInfo() &&
	!canRedefineFunction(Old, getLangOpts())) {
	if (getLangOpts().MicrosoftExt) {
	Diag(New->getLocation(), diag::ext_static_non_static) << New;
	Diag(OldLocation, PrevDiag);
	} else {
	Diag(New->getLocation(), diag::err_static_non_static) << New;
	Diag(OldLocation, PrevDiag);
	return true;
	}
	}

	if (const auto *ILA = New->getAttr<InternalLinkageAttr>())
	if (!Old->hasAttr<InternalLinkageAttr>()) {
	Diag(New->getLocation(), diag::err_attribute_missing_on_first_decl)
	<< ILA;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	New->dropAttr<InternalLinkageAttr>();
	}

	if (auto *EA = New->getAttr<ErrorAttr>()) {
	if (!Old->hasAttr<ErrorAttr>()) {
	Diag(EA->getLocation(), diag::err_attribute_missing_on_first_decl) << EA;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	New->dropAttr<ErrorAttr>();
	}
	}

	if (CheckRedeclarationInModule(New, Old))
	return true;

	if (!getLangOpts().CPlusPlus) {
	bool OldOvl = Old->hasAttr<OverloadableAttr>();
	if (OldOvl != New->hasAttr<OverloadableAttr>() && !Old->isImplicit()) {
	Diag(New->getLocation(), diag::err_attribute_overloadable_mismatch)
	<< New << OldOvl;

	// Try our best to find a decl that actually has the overloadable
	// attribute for the note. In most cases (e.g. programs with only one
	// broken declaration/definition), this won't matter.
	//
	// FIXME: We could do this if we juggled some extra state in
	// OverloadableAttr, rather than just removing it.
	const Decl *DiagOld = Old;
	if (OldOvl) {
	auto OldIter = llvm::find_if(Old->redecls(), [](const Decl *D) {
	const auto *A = D->getAttr<OverloadableAttr>();
	return A && !A->isImplicit();
	});
	// If we've implicitly added all of the overloadable attrs to this
	// chain, emitting a "previous redecl" note is pointless.
	DiagOld = OldIter == Old->redecls_end() ? nullptr : *OldIter;
	}

	if (DiagOld)
	Diag(DiagOld->getLocation(),
	diag::note_attribute_overloadable_prev_overload)
	<< OldOvl;

	if (OldOvl)
	New->addAttr(OverloadableAttr::CreateImplicit(Context));
	else
	New->dropAttr<OverloadableAttr>();
	}
	}

	// If a function is first declared with a calling convention, but is later
	// declared or defined without one, all following decls assume the calling
	// convention of the first.
	//
	// It's OK if a function is first declared without a calling convention,
	// but is later declared or defined with the default calling convention.
	//
	// To test if either decl has an explicit calling convention, we look for
	// AttributedType sugar nodes on the type as written. If they are missing or
	// were canonicalized away, we assume the calling convention was implicit.
	//
	// Note also that we DO NOT return at this point, because we still have
	// other tests to run.
	QualType OldQType = Context.getCanonicalType(Old->getType());
	QualType NewQType = Context.getCanonicalType(New->getType());
	const FunctionType *OldType = cast<FunctionType>(OldQType);
	const FunctionType *NewType = cast<FunctionType>(NewQType);
	FunctionType::ExtInfo OldTypeInfo = OldType->getExtInfo();
	FunctionType::ExtInfo NewTypeInfo = NewType->getExtInfo();
	bool RequiresAdjustment = false;

	if (OldTypeInfo.getCC() != NewTypeInfo.getCC()) {
	FunctionDecl *First = Old->getFirstDecl();
	const FunctionType *FT =
	First->getType().getCanonicalType()->castAs<FunctionType>();
	FunctionType::ExtInfo FI = FT->getExtInfo();
	bool NewCCExplicit = getCallingConvAttributedType(New->getType());
	if (!NewCCExplicit) {
	// Inherit the CC from the previous declaration if it was specified
	// there but not here.
	NewTypeInfo = NewTypeInfo.withCallingConv(OldTypeInfo.getCC());
	RequiresAdjustment = true;
	} else if (Old->getBuiltinID()) {
	// Builtin attribute isn't propagated to the new one yet at this point,
	// so we check if the old one is a builtin.

	// Calling Conventions on a Builtin aren't really useful and setting a
	// default calling convention and cdecl'ing some builtin redeclarations is
	// common, so warn and ignore the calling convention on the redeclaration.
	Diag(New->getLocation(), diag::warn_cconv_unsupported)
	<< FunctionType::getNameForCallConv(NewTypeInfo.getCC())
	<< (int)CallingConventionIgnoredReason::BuiltinFunction;
	NewTypeInfo = NewTypeInfo.withCallingConv(OldTypeInfo.getCC());
	RequiresAdjustment = true;
	} else {
	// Calling conventions aren't compatible, so complain.
	bool FirstCCExplicit = getCallingConvAttributedType(First->getType());
	Diag(New->getLocation(), diag::err_cconv_change)
	<< FunctionType::getNameForCallConv(NewTypeInfo.getCC())
	<< !FirstCCExplicit
	<< (!FirstCCExplicit ? "" :
	FunctionType::getNameForCallConv(FI.getCC()));

	// Put the note on the first decl, since it is the one that matters.
	Diag(First->getLocation(), diag::note_previous_declaration);
	return true;
	}
	}

	// FIXME: diagnose the other way around?
	if (OldTypeInfo.getNoReturn() && !NewTypeInfo.getNoReturn()) {
	NewTypeInfo = NewTypeInfo.withNoReturn(true);
	RequiresAdjustment = true;
	}

	// Merge regparm attribute.
	if (OldTypeInfo.getHasRegParm() != NewTypeInfo.getHasRegParm() \|\|
	OldTypeInfo.getRegParm() != NewTypeInfo.getRegParm()) {
	if (NewTypeInfo.getHasRegParm()) {
	Diag(New->getLocation(), diag::err_regparm_mismatch)
	<< NewType->getRegParmType()
	<< OldType->getRegParmType();
	Diag(OldLocation, diag::note_previous_declaration);
	return true;
	}

	NewTypeInfo = NewTypeInfo.withRegParm(OldTypeInfo.getRegParm());
	RequiresAdjustment = true;
	}

	// Merge ns_returns_retained attribute.
	if (OldTypeInfo.getProducesResult() != NewTypeInfo.getProducesResult()) {
	if (NewTypeInfo.getProducesResult()) {
	Diag(New->getLocation(), diag::err_function_attribute_mismatch)
	<< "'ns_returns_retained'";
	Diag(OldLocation, diag::note_previous_declaration);
	return true;
	}

	NewTypeInfo = NewTypeInfo.withProducesResult(true);
	RequiresAdjustment = true;
	}

	if (OldTypeInfo.getNoCallerSavedRegs() !=
	NewTypeInfo.getNoCallerSavedRegs()) {
	if (NewTypeInfo.getNoCallerSavedRegs()) {
	AnyX86NoCallerSavedRegistersAttr *Attr =
	New->getAttr<AnyX86NoCallerSavedRegistersAttr>();
	Diag(New->getLocation(), diag::err_function_attribute_mismatch) << Attr;
	Diag(OldLocation, diag::note_previous_declaration);
	return true;
	}

	NewTypeInfo = NewTypeInfo.withNoCallerSavedRegs(true);
	RequiresAdjustment = true;
	}

	if (RequiresAdjustment) {
	const FunctionType *AdjustedType = New->getType()->getAs<FunctionType>();
	AdjustedType = Context.adjustFunctionType(AdjustedType, NewTypeInfo);
	New->setType(QualType(AdjustedType, 0));
	NewQType = Context.getCanonicalType(New->getType());
	}

	// If this redeclaration makes the function inline, we may need to add it to
	// UndefinedButUsed.
	if (!Old->isInlined() && New->isInlined() &&
	!New->hasAttr<GNUInlineAttr>() &&
	!getLangOpts().GNUInline &&
	Old->isUsed(false) &&
	!Old->isDefined() && !New->isThisDeclarationADefinition())
	UndefinedButUsed.insert(std::make_pair(Old->getCanonicalDecl(),
	SourceLocation()));

	// If this redeclaration makes it newly gnu_inline, we don't want to warn
	// about it.
	if (New->hasAttr<GNUInlineAttr>() &&
	Old->isInlined() && !Old->hasAttr<GNUInlineAttr>()) {
	UndefinedButUsed.erase(Old->getCanonicalDecl());
	}

	// If pass_object_size params don't match up perfectly, this isn't a valid
	// redeclaration.
	if (Old->getNumParams() > 0 && Old->getNumParams() == New->getNumParams() &&
	!hasIdenticalPassObjectSizeAttrs(Old, New)) {
	Diag(New->getLocation(), diag::err_different_pass_object_size_params)
	<< New->getDeclName();
	Diag(OldLocation, PrevDiag) << Old << Old->getType();
	return true;
	}

	if (getLangOpts().CPlusPlus) {
	// C++1z [over.load]p2
	// Certain function declarations cannot be overloaded:
	// -- Function declarations that differ only in the return type,
	// the exception specification, or both cannot be overloaded.

	// Check the exception specifications match. This may recompute the type of
	// both Old and New if it resolved exception specifications, so grab the
	// types again after this. Because this updates the type, we do this before
	// any of the other checks below, which may update the "de facto" NewQType
	// but do not necessarily update the type of New.
	if (CheckEquivalentExceptionSpec(Old, New))
	return true;
	OldQType = Context.getCanonicalType(Old->getType());
	NewQType = Context.getCanonicalType(New->getType());

	// Go back to the type source info to compare the declared return types,
	// per C++1y [dcl.type.auto]p13:
	// Redeclarations or specializations of a function or function template
	// with a declared return type that uses a placeholder type shall also
	// use that placeholder, not a deduced type.
	QualType OldDeclaredReturnType = Old->getDeclaredReturnType();
	QualType NewDeclaredReturnType = New->getDeclaredReturnType();
	if (!Context.hasSameType(OldDeclaredReturnType, NewDeclaredReturnType) &&
	canFullyTypeCheckRedeclaration(New, Old, NewDeclaredReturnType,
	OldDeclaredReturnType)) {
	QualType ResQT;
	if (NewDeclaredReturnType->isObjCObjectPointerType() &&
	OldDeclaredReturnType->isObjCObjectPointerType())
	// FIXME: This does the wrong thing for a deduced return type.
	ResQT = Context.mergeObjCGCQualifiers(NewQType, OldQType);
	if (ResQT.isNull()) {
	if (New->isCXXClassMember() && New->isOutOfLine())
	Diag(New->getLocation(), diag::err_member_def_does_not_match_ret_type)
	<< New << New->getReturnTypeSourceRange();
	else
	Diag(New->getLocation(), diag::err_ovl_diff_return_type)
	<< New->getReturnTypeSourceRange();
	Diag(OldLocation, PrevDiag) << Old << Old->getType()
	<< Old->getReturnTypeSourceRange();
	return true;
	}
	else
	NewQType = ResQT;
	}

	QualType OldReturnType = OldType->getReturnType();
	QualType NewReturnType = cast<FunctionType>(NewQType)->getReturnType();
	if (OldReturnType != NewReturnType) {
	// If this function has a deduced return type and has already been
	// defined, copy the deduced value from the old declaration.
	AutoType *OldAT = Old->getReturnType()->getContainedAutoType();
	if (OldAT && OldAT->isDeduced()) {
	QualType DT = OldAT->getDeducedType();
	if (DT.isNull()) {
	New->setType(SubstAutoTypeDependent(New->getType()));
	NewQType = Context.getCanonicalType(SubstAutoTypeDependent(NewQType));
	} else {
	New->setType(SubstAutoType(New->getType(), DT));
	NewQType = Context.getCanonicalType(SubstAutoType(NewQType, DT));
	}
	}
	}

	const CXXMethodDecl *OldMethod = dyn_cast<CXXMethodDecl>(Old);
	CXXMethodDecl *NewMethod = dyn_cast<CXXMethodDecl>(New);
	if (OldMethod && NewMethod) {
	// Preserve triviality.
	NewMethod->setTrivial(OldMethod->isTrivial());

	// MSVC allows explicit template specialization at class scope:
	// 2 CXXMethodDecls referring to the same function will be injected.
	// We don't want a redeclaration error.
	bool IsClassScopeExplicitSpecialization =
	OldMethod->isFunctionTemplateSpecialization() &&
	NewMethod->isFunctionTemplateSpecialization();
	bool isFriend = NewMethod->getFriendObjectKind();

	if (!isFriend && NewMethod->getLexicalDeclContext()->isRecord() &&
	!IsClassScopeExplicitSpecialization) {
	// -- Member function declarations with the same name and the
	// same parameter types cannot be overloaded if any of them
	// is a static member function declaration.
	if (OldMethod->isStatic() != NewMethod->isStatic()) {
	Diag(New->getLocation(), diag::err_ovl_static_nonstatic_member);
	Diag(OldLocation, PrevDiag) << Old << Old->getType();
	return true;
	}

	// C++ [class.mem]p1:
	// [...] A member shall not be declared twice in the
	// member-specification, except that a nested class or member
	// class template can be declared and then later defined.
	if (!inTemplateInstantiation()) {
	unsigned NewDiag;
	if (isa<CXXConstructorDecl>(OldMethod))
	NewDiag = diag::err_constructor_redeclared;
	else if (isa<CXXDestructorDecl>(NewMethod))
	NewDiag = diag::err_destructor_redeclared;
	else if (isa<CXXConversionDecl>(NewMethod))
	NewDiag = diag::err_conv_function_redeclared;
	else
	NewDiag = diag::err_member_redeclared;

	Diag(New->getLocation(), NewDiag);
	} else {
	Diag(New->getLocation(), diag::err_member_redeclared_in_instantiation)
	<< New << New->getType();
	}
	Diag(OldLocation, PrevDiag) << Old << Old->getType();
	return true;

	// Complain if this is an explicit declaration of a special
	// member that was initially declared implicitly.
	//
	// As an exception, it's okay to befriend such methods in order
	// to permit the implicit constructor/destructor/operator calls.
	} else if (OldMethod->isImplicit()) {
	if (isFriend) {
	NewMethod->setImplicit();
	} else {
	Diag(NewMethod->getLocation(),
	diag::err_definition_of_implicitly_declared_member)
	<< New << getSpecialMember(OldMethod);
	return true;
	}
	} else if (OldMethod->getFirstDecl()->isExplicitlyDefaulted() && !isFriend) {
	Diag(NewMethod->getLocation(),
	diag::err_definition_of_explicitly_defaulted_member)
	<< getSpecialMember(OldMethod);
	return true;
	}
	}

	// C++11 [dcl.attr.noreturn]p1:
	// The first declaration of a function shall specify the noreturn
	// attribute if any declaration of that function specifies the noreturn
	// attribute.
	if (const auto *NRA = New->getAttr<CXX11NoReturnAttr>())
	if (!Old->hasAttr<CXX11NoReturnAttr>()) {
	Diag(NRA->getLocation(), diag::err_attribute_missing_on_first_decl)
	<< NRA;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	}

	// C++11 [dcl.attr.depend]p2:
	// The first declaration of a function shall specify the
	// carries_dependency attribute for its declarator-id if any declaration
	// of the function specifies the carries_dependency attribute.
	const CarriesDependencyAttr *CDA = New->getAttr<CarriesDependencyAttr>();
	if (CDA && !Old->hasAttr<CarriesDependencyAttr>()) {
	Diag(CDA->getLocation(),
	diag::err_carries_dependency_missing_on_first_decl) << 0/Function/;
	Diag(Old->getFirstDecl()->getLocation(),
	diag::note_carries_dependency_missing_first_decl) << 0/Function/;
	}

	// (C++98 8.3.5p3):
	// All declarations for a function shall agree exactly in both the
	// return type and the parameter-type-list.
	// We also want to respect all the extended bits except noreturn.

	// noreturn should now match unless the old type info didn't have it.
	QualType OldQTypeForComparison = OldQType;
	if (!OldTypeInfo.getNoReturn() && NewTypeInfo.getNoReturn()) {
	auto *OldType = OldQType->castAs<FunctionProtoType>();
	const FunctionType *OldTypeForComparison
	= Context.adjustFunctionType(OldType, OldTypeInfo.withNoReturn(true));
	OldQTypeForComparison = QualType(OldTypeForComparison, 0);
	assert(OldQTypeForComparison.isCanonical());
	}

	if (haveIncompatibleLanguageLinkages(Old, New)) {
	// As a special case, retain the language linkage from previous
	// declarations of a friend function as an extension.
	//
	// This liberal interpretation of C++ [class.friend]p3 matches GCC/MSVC
	// and is useful because there's otherwise no way to specify language
	// linkage within class scope.
	//
	// Check cautiously as the friend object kind isn't yet complete.
	if (New->getFriendObjectKind() != Decl::FOK_None) {
	Diag(New->getLocation(), diag::ext_retained_language_linkage) << New;
	Diag(OldLocation, PrevDiag);
	} else {
	Diag(New->getLocation(), diag::err_different_language_linkage) << New;
	Diag(OldLocation, PrevDiag);
	return true;
	}
	}

	// If the function types are compatible, merge the declarations. Ignore the
	// exception specifier because it was already checked above in
	// CheckEquivalentExceptionSpec, and we don't want follow-on diagnostics
	// about incompatible types under -fms-compatibility.
	if (Context.hasSameFunctionTypeIgnoringExceptionSpec(OldQTypeForComparison,
	NewQType))
	return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);

	// If the types are imprecise (due to dependent constructs in friends or
	// local extern declarations), it's OK if they differ. We'll check again
	// during instantiation.
	if (!canFullyTypeCheckRedeclaration(New, Old, NewQType, OldQType))
	return false;

	// Fall through for conflicting redeclarations and redefinitions.
	}

	// C: Function types need to be compatible, not identical. This handles
	// duplicate function decls like "void f(int); void f(enum X);" properly.
	if (!getLangOpts().CPlusPlus) {
	// C99 6.7.5.3p15: ...If one type has a parameter type list and the other
	// type is specified by a function definition that contains a (possibly
	// empty) identifier list, both shall agree in the number of parameters
	// and the type of each parameter shall be compatible with the type that
	// results from the application of default argument promotions to the
	// type of the corresponding identifier. ...
	// This cannot be handled by ASTContext::typesAreCompatible() because that
	// doesn't know whether the function type is for a definition or not when
	// eventually calling ASTContext::mergeFunctionTypes(). The only situation
	// we need to cover here is that the number of arguments agree as the
	// default argument promotion rules were already checked by
	// ASTContext::typesAreCompatible().
	if (Old->hasPrototype() && !New->hasWrittenPrototype() && NewDeclIsDefn &&
	Old->getNumParams() != New->getNumParams() && !Old->isImplicit()) {
	if (Old->hasInheritedPrototype())
	Old = Old->getCanonicalDecl();
	Diag(New->getLocation(), diag::err_conflicting_types) << New;
	Diag(Old->getLocation(), PrevDiag) << Old << Old->getType();
	return true;
	}

	// If we are merging two functions where only one of them has a prototype,
	// we may have enough information to decide to issue a diagnostic that the
	// function without a protoype will change behavior in C2x. This handles
	// cases like:
	// void i(); void i(int j);
	// void i(int j); void i();
	// void i(); void i(int j) {}
	// See ActOnFinishFunctionBody() for other cases of the behavior change
	// diagnostic. See GetFullTypeForDeclarator() for handling of a function
	// type without a prototype.
	if (New->hasWrittenPrototype() != Old->hasWrittenPrototype() &&
	!New->isImplicit() && !Old->isImplicit()) {
	const FunctionDecl WithProto, WithoutProto;
	if (New->hasWrittenPrototype()) {
	WithProto = New;
	WithoutProto = Old;
	} else {
	WithProto = Old;
	WithoutProto = New;
	}

	if (WithProto->getNumParams() != 0) {
	if (WithoutProto->getBuiltinID() == 0 && !WithoutProto->isImplicit()) {
	// The one without the prototype will be changing behavior in C2x, so
	// warn about that one so long as it's a user-visible declaration.
	bool IsWithoutProtoADef = false, IsWithProtoADef = false;
	if (WithoutProto == New)
	IsWithoutProtoADef = NewDeclIsDefn;
	else
	IsWithProtoADef = NewDeclIsDefn;
	Diag(WithoutProto->getLocation(),
	diag::warn_non_prototype_changes_behavior)
	<< IsWithoutProtoADef << (WithoutProto->getNumParams() ? 0 : 1)
	<< (WithoutProto == Old) << IsWithProtoADef;

	// The reason the one without the prototype will be changing behavior
	// is because of the one with the prototype, so note that so long as
	// it's a user-visible declaration. There is one exception to this:
	// when the new declaration is a definition without a prototype, the
	// old declaration with a prototype is not the cause of the issue,
	// and that does not need to be noted because the one with a
	// prototype will not change behavior in C2x.
	if (WithProto->getBuiltinID() == 0 && !WithProto->isImplicit() &&
	!IsWithoutProtoADef)
	Diag(WithProto->getLocation(), diag::note_conflicting_prototype);
	}
	}
	}

	if (Context.typesAreCompatible(OldQType, NewQType)) {
	const FunctionType *OldFuncType = OldQType->getAs<FunctionType>();
	const FunctionType *NewFuncType = NewQType->getAs<FunctionType>();
	const FunctionProtoType *OldProto = nullptr;
	if (MergeTypeWithOld && isa<FunctionNoProtoType>(NewFuncType) &&
	(OldProto = dyn_cast<FunctionProtoType>(OldFuncType))) {
	// The old declaration provided a function prototype, but the
	// new declaration does not. Merge in the prototype.
	assert(!OldProto->hasExceptionSpec() && "Exception spec in C");
	NewQType = Context.getFunctionType(NewFuncType->getReturnType(),
	OldProto->getParamTypes(),
	OldProto->getExtProtoInfo());
	New->setType(NewQType);
	New->setHasInheritedPrototype();

	// Synthesize parameters with the same types.
	SmallVector<ParmVarDecl *, 16> Params;
	for (const auto &ParamType : OldProto->param_types()) {
	ParmVarDecl *Param = ParmVarDecl::Create(
	Context, New, SourceLocation(), SourceLocation(), nullptr,
	ParamType, /TInfo=/nullptr, SC_None, nullptr);
	Param->setScopeInfo(0, Params.size());
	Param->setImplicit();
	Params.push_back(Param);
	}

	New->setParams(Params);
	}

	return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
	}
	}

	// Check if the function types are compatible when pointer size address
	// spaces are ignored.
	if (Context.hasSameFunctionTypeIgnoringPtrSizes(OldQType, NewQType))
	return false;

	// GNU C permits a K&R definition to follow a prototype declaration
	// if the declared types of the parameters in the K&R definition
	// match the types in the prototype declaration, even when the
	// promoted types of the parameters from the K&R definition differ
	// from the types in the prototype. GCC then keeps the types from
	// the prototype.
	//
	// If a variadic prototype is followed by a non-variadic K&R definition,
	// the K&R definition becomes variadic. This is sort of an edge case, but
	// it's legal per the standard depending on how you read C99 6.7.5.3p15 and
	// C99 6.9.1p8.
	if (!getLangOpts().CPlusPlus &&
	Old->hasPrototype() && !New->hasPrototype() &&
	New->getType()->getAs<FunctionProtoType>() &&
	Old->getNumParams() == New->getNumParams()) {
	SmallVector<QualType, 16> ArgTypes;
	SmallVector<GNUCompatibleParamWarning, 16> Warnings;
	const FunctionProtoType *OldProto
	= Old->getType()->getAs<FunctionProtoType>();
	const FunctionProtoType *NewProto
	= New->getType()->getAs<FunctionProtoType>();

	// Determine whether this is the GNU C extension.
	QualType MergedReturn = Context.mergeTypes(OldProto->getReturnType(),
	NewProto->getReturnType());
	bool LooseCompatible = !MergedReturn.isNull();
	for (unsigned Idx = 0, End = Old->getNumParams();
	LooseCompatible && Idx != End; ++Idx) {
	ParmVarDecl *OldParm = Old->getParamDecl(Idx);
	ParmVarDecl *NewParm = New->getParamDecl(Idx);
	if (Context.typesAreCompatible(OldParm->getType(),
	NewProto->getParamType(Idx))) {
	ArgTypes.push_back(NewParm->getType());
	} else if (Context.typesAreCompatible(OldParm->getType(),
	NewParm->getType(),
	/CompareUnqualified=/true)) {
	GNUCompatibleParamWarning Warn = { OldParm, NewParm,
	NewProto->getParamType(Idx) };
	Warnings.push_back(Warn);
	ArgTypes.push_back(NewParm->getType());
	} else
	LooseCompatible = false;
	}

	if (LooseCompatible) {
	for (unsigned Warn = 0; Warn < Warnings.size(); ++Warn) {
	Diag(Warnings[Warn].NewParm->getLocation(),
	diag::ext_param_promoted_not_compatible_with_prototype)
	<< Warnings[Warn].PromotedType
	<< Warnings[Warn].OldParm->getType();
	if (Warnings[Warn].OldParm->getLocation().isValid())
	Diag(Warnings[Warn].OldParm->getLocation(),
	diag::note_previous_declaration);
	}

	if (MergeTypeWithOld)
	New->setType(Context.getFunctionType(MergedReturn, ArgTypes,
	OldProto->getExtProtoInfo()));
	return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
	}

	// Fall through to diagnose conflicting types.
	}

	// A function that has already been declared has been redeclared or
	// defined with a different type; show an appropriate diagnostic.

	// If the previous declaration was an implicitly-generated builtin
	// declaration, then at the very least we should use a specialized note.
	unsigned BuiltinID;
	if (Old->isImplicit() && (BuiltinID = Old->getBuiltinID())) {
	// If it's actually a library-defined builtin function like 'malloc'
	// or 'printf', just warn about the incompatible redeclaration.
	if (Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) {
	Diag(New->getLocation(), diag::warn_redecl_library_builtin) << New;
	Diag(OldLocation, diag::note_previous_builtin_declaration)
	<< Old << Old->getType();
	return false;
	}

	PrevDiag = diag::note_previous_builtin_declaration;
	}

	Diag(New->getLocation(), diag::err_conflicting_types) << New->getDeclName();
	Diag(OldLocation, PrevDiag) << Old << Old->getType();
	return true;
	}

	/// Completes the merge of two function declarations that are
	/// known to be compatible.
	///
	/// This routine handles the merging of attributes and other
	/// properties of function declarations from the old declaration to
	/// the new declaration, once we know that New is in fact a
	/// redeclaration of Old.
	///
	/// \returns false
	bool Sema::MergeCompatibleFunctionDecls(FunctionDecl New, FunctionDecl Old,
	Scope *S, bool MergeTypeWithOld) {
	// Merge the attributes
	mergeDeclAttributes(New, Old);

	// Merge "pure" flag.
	if (Old->isPure())
	New->setPure();

	// Merge "used" flag.
	if (Old->getMostRecentDecl()->isUsed(false))
	New->setIsUsed();

	// Merge attributes from the parameters. These can mismatch with K&R
	// declarations.
	if (New->getNumParams() == Old->getNumParams())
	for (unsigned i = 0, e = New->getNumParams(); i != e; ++i) {
	ParmVarDecl *NewParam = New->getParamDecl(i);
	ParmVarDecl *OldParam = Old->getParamDecl(i);
	mergeParamDeclAttributes(NewParam, OldParam, *this);
	mergeParamDeclTypes(NewParam, OldParam, *this);
	}

	if (getLangOpts().CPlusPlus)
	return MergeCXXFunctionDecl(New, Old, S);

	// Merge the function types so the we get the composite types for the return
	// and argument types. Per C11 6.2.7/4, only update the type if the old decl
	// was visible.
	QualType Merged = Context.mergeTypes(Old->getType(), New->getType());
	if (!Merged.isNull() && MergeTypeWithOld)
	New->setType(Merged);

	return false;
	}

	void Sema::mergeObjCMethodDecls(ObjCMethodDecl *newMethod,
	ObjCMethodDecl *oldMethod) {
	// Merge the attributes, including deprecated/unavailable
	AvailabilityMergeKind MergeKind =
	isa<ObjCProtocolDecl>(oldMethod->getDeclContext())
	? (oldMethod->isOptional() ? AMK_OptionalProtocolImplementation
	: AMK_ProtocolImplementation)
	: isa<ObjCImplDecl>(newMethod->getDeclContext()) ? AMK_Redeclaration
	: AMK_Override;

	mergeDeclAttributes(newMethod, oldMethod, MergeKind);

	// Merge attributes from the parameters.
	ObjCMethodDecl::param_const_iterator oi = oldMethod->param_begin(),
	oe = oldMethod->param_end();
	for (ObjCMethodDecl::param_iterator
	ni = newMethod->param_begin(), ne = newMethod->param_end();
	ni != ne && oi != oe; ++ni, ++oi)
	mergeParamDeclAttributes(ni, oi, *this);

	CheckObjCMethodOverride(newMethod, oldMethod);
	}

	static void diagnoseVarDeclTypeMismatch(Sema &S, VarDecl New, VarDecl Old) {
	assert(!S.Context.hasSameType(New->getType(), Old->getType()));

	S.Diag(New->getLocation(), New->isThisDeclarationADefinition()
	? diag::err_redefinition_different_type
	: diag::err_redeclaration_different_type)
	<< New->getDeclName() << New->getType() << Old->getType();

	diag::kind PrevDiag;
	SourceLocation OldLocation;
	std::tie(PrevDiag, OldLocation)
	= getNoteDiagForInvalidRedeclaration(Old, New);
	S.Diag(OldLocation, PrevDiag);
	New->setInvalidDecl();
	}

	/// MergeVarDeclTypes - We parsed a variable 'New' which has the same name and
	/// scope as a previous declaration 'Old'. Figure out how to merge their types,
	/// emitting diagnostics as appropriate.
	///
	/// Declarations using the auto type specifier (C++ [decl.spec.auto]) call back
	/// to here in AddInitializerToDecl. We can't check them before the initializer
	/// is attached.
	void Sema::MergeVarDeclTypes(VarDecl New, VarDecl Old,
	bool MergeTypeWithOld) {
	if (New->isInvalidDecl() \|\| Old->isInvalidDecl())
	return;

	QualType MergedT;
	if (getLangOpts().CPlusPlus) {
	if (New->getType()->isUndeducedType()) {
	// We don't know what the new type is until the initializer is attached.
	return;
	} else if (Context.hasSameType(New->getType(), Old->getType())) {
	// These could still be something that needs exception specs checked.
	return MergeVarDeclExceptionSpecs(New, Old);
	}
	// C++ [basic.link]p10:
	// [...] the types specified by all declarations referring to a given
	// object or function shall be identical, except that declarations for an
	// array object can specify array types that differ by the presence or
	// absence of a major array bound (8.3.4).
	else if (Old->getType()->isArrayType() && New->getType()->isArrayType()) {
	const ArrayType *OldArray = Context.getAsArrayType(Old->getType());
	const ArrayType *NewArray = Context.getAsArrayType(New->getType());

	// We are merging a variable declaration New into Old. If it has an array
	// bound, and that bound differs from Old's bound, we should diagnose the
	// mismatch.
	if (!NewArray->isIncompleteArrayType() && !NewArray->isDependentType()) {
	for (VarDecl *PrevVD = Old->getMostRecentDecl(); PrevVD;
	PrevVD = PrevVD->getPreviousDecl()) {
	QualType PrevVDTy = PrevVD->getType();
	if (PrevVDTy->isIncompleteArrayType() \|\| PrevVDTy->isDependentType())
	continue;

	if (!Context.hasSameType(New->getType(), PrevVDTy))
	return diagnoseVarDeclTypeMismatch(*this, New, PrevVD);
	}
	}

	if (OldArray->isIncompleteArrayType() && NewArray->isArrayType()) {
	if (Context.hasSameType(OldArray->getElementType(),
	NewArray->getElementType()))
	MergedT = New->getType();
	}
	// FIXME: Check visibility. New is hidden but has a complete type. If New
	// has no array bound, it should not inherit one from Old, if Old is not
	// visible.
	else if (OldArray->isArrayType() && NewArray->isIncompleteArrayType()) {
	if (Context.hasSameType(OldArray->getElementType(),
	NewArray->getElementType()))
	MergedT = Old->getType();
	}
	}
	else if (New->getType()->isObjCObjectPointerType() &&
	Old->getType()->isObjCObjectPointerType()) {
	MergedT = Context.mergeObjCGCQualifiers(New->getType(),
	Old->getType());
	}
	} else {
	// C 6.2.7p2:
	// All declarations that refer to the same object or function shall have
	// compatible type.
	MergedT = Context.mergeTypes(New->getType(), Old->getType());
	}
	if (MergedT.isNull()) {
	// It's OK if we couldn't merge types if either type is dependent, for a
	// block-scope variable. In other cases (static data members of class
	// templates, variable templates, ...), we require the types to be
	// equivalent.
	// FIXME: The C++ standard doesn't say anything about this.
	if ((New->getType()->isDependentType() \|\|
	Old->getType()->isDependentType()) && New->isLocalVarDecl()) {
	// If the old type was dependent, we can't merge with it, so the new type
	// becomes dependent for now. We'll reproduce the original type when we
	// instantiate the TypeSourceInfo for the variable.
	if (!New->getType()->isDependentType() && MergeTypeWithOld)
	New->setType(Context.DependentTy);
	return;
	}
	return diagnoseVarDeclTypeMismatch(*this, New, Old);
	}

	// Don't actually update the type on the new declaration if the old
	// declaration was an extern declaration in a different scope.
	if (MergeTypeWithOld)
	New->setType(MergedT);
	}

	static bool mergeTypeWithPrevious(Sema &S, VarDecl NewVD, VarDecl OldVD,
	LookupResult &Previous) {
	// C11 6.2.7p4:
	// For an identifier with internal or external linkage declared
	// in a scope in which a prior declaration of that identifier is
	// visible, if the prior declaration specifies internal or
	// external linkage, the type of the identifier at the later
	// declaration becomes the composite type.
	//
	// If the variable isn't visible, we do not merge with its type.
	if (Previous.isShadowed())
	return false;

	if (S.getLangOpts().CPlusPlus) {
	// C++11 [dcl.array]p3:
	// If there is a preceding declaration of the entity in the same
	// scope in which the bound was specified, an omitted array bound
	// is taken to be the same as in that earlier declaration.
	return NewVD->isPreviousDeclInSameBlockScope() \|\|
	(!OldVD->getLexicalDeclContext()->isFunctionOrMethod() &&
	!NewVD->getLexicalDeclContext()->isFunctionOrMethod());
	} else {
	// If the old declaration was function-local, don't merge with its
	// type unless we're in the same function.
	return !OldVD->getLexicalDeclContext()->isFunctionOrMethod() \|\|
	OldVD->getLexicalDeclContext() == NewVD->getLexicalDeclContext();
	}
	}

	/// MergeVarDecl - We just parsed a variable 'New' which has the same name
	/// and scope as a previous declaration 'Old'. Figure out how to resolve this
	/// situation, merging decls or emitting diagnostics as appropriate.
	///
	/// Tentative definition rules (C99 6.9.2p2) are checked by
	/// FinalizeDeclaratorGroup. Unfortunately, we can't analyze tentative
	/// definitions here, since the initializer hasn't been attached.
	///
	void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
	// If the new decl is already invalid, don't do any other checking.
	if (New->isInvalidDecl())
	return;

	if (!shouldLinkPossiblyHiddenDecl(Previous, New))
	return;

	VarTemplateDecl *NewTemplate = New->getDescribedVarTemplate();

	// Verify the old decl was also a variable or variable template.
	VarDecl *Old = nullptr;
	VarTemplateDecl *OldTemplate = nullptr;
	if (Previous.isSingleResult()) {
	if (NewTemplate) {
	OldTemplate = dyn_cast<VarTemplateDecl>(Previous.getFoundDecl());
	Old = OldTemplate ? OldTemplate->getTemplatedDecl() : nullptr;

	if (auto *Shadow =
	dyn_cast<UsingShadowDecl>(Previous.getRepresentativeDecl()))
	if (checkUsingShadowRedecl<VarTemplateDecl>(*this, Shadow, NewTemplate))
	return New->setInvalidDecl();
	} else {
	Old = dyn_cast<VarDecl>(Previous.getFoundDecl());

	if (auto *Shadow =
	dyn_cast<UsingShadowDecl>(Previous.getRepresentativeDecl()))
	if (checkUsingShadowRedecl<VarDecl>(*this, Shadow, New))
	return New->setInvalidDecl();
	}
	}
	if (!Old) {
	Diag(New->getLocation(), diag::err_redefinition_different_kind)
	<< New->getDeclName();
	notePreviousDefinition(Previous.getRepresentativeDecl(),
	New->getLocation());
	return New->setInvalidDecl();
	}

	// If the old declaration was found in an inline namespace and the new
	// declaration was qualified, update the DeclContext to match.
	adjustDeclContextForDeclaratorDecl(New, Old);

	// Ensure the template parameters are compatible.
	if (NewTemplate &&
	!TemplateParameterListsAreEqual(NewTemplate->getTemplateParameters(),
	OldTemplate->getTemplateParameters(),
	/Complain=/true, TPL_TemplateMatch))
	return New->setInvalidDecl();

	// C++ [class.mem]p1:
	// A member shall not be declared twice in the member-specification [...]
	//
	// Here, we need only consider static data members.
	if (Old->isStaticDataMember() && !New->isOutOfLine()) {
	Diag(New->getLocation(), diag::err_duplicate_member)
	<< New->getIdentifier();
	Diag(Old->getLocation(), diag::note_previous_declaration);
	New->setInvalidDecl();
	}

	mergeDeclAttributes(New, Old);
	// Warn if an already-declared variable is made a weak_import in a subsequent
	// declaration
	if (New->hasAttr<WeakImportAttr>() &&
	Old->getStorageClass() == SC_None &&
	!Old->hasAttr<WeakImportAttr>()) {
	Diag(New->getLocation(), diag::warn_weak_import) << New->getDeclName();
	Diag(Old->getLocation(), diag::note_previous_declaration);
	// Remove weak_import attribute on new declaration.
	New->dropAttr<WeakImportAttr>();
	}

	if (const auto *ILA = New->getAttr<InternalLinkageAttr>())
	if (!Old->hasAttr<InternalLinkageAttr>()) {
	Diag(New->getLocation(), diag::err_attribute_missing_on_first_decl)
	<< ILA;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	New->dropAttr<InternalLinkageAttr>();
	}

	// Merge the types.
	VarDecl *MostRecent = Old->getMostRecentDecl();
	if (MostRecent != Old) {
	MergeVarDeclTypes(New, MostRecent,
	mergeTypeWithPrevious(*this, New, MostRecent, Previous));
	if (New->isInvalidDecl())
	return;
	}

	MergeVarDeclTypes(New, Old, mergeTypeWithPrevious(*this, New, Old, Previous));
	if (New->isInvalidDecl())
	return;

	diag::kind PrevDiag;
	SourceLocation OldLocation;
	std::tie(PrevDiag, OldLocation) =
	getNoteDiagForInvalidRedeclaration(Old, New);

	// [dcl.stc]p8: Check if we have a non-static decl followed by a static.
	if (New->getStorageClass() == SC_Static &&
	!New->isStaticDataMember() &&
	Old->hasExternalFormalLinkage()) {
	if (getLangOpts().MicrosoftExt) {
	Diag(New->getLocation(), diag::ext_static_non_static)
	<< New->getDeclName();
	Diag(OldLocation, PrevDiag);
	} else {
	Diag(New->getLocation(), diag::err_static_non_static)
	<< New->getDeclName();
	Diag(OldLocation, PrevDiag);
	return New->setInvalidDecl();
	}
	}
	// C99 6.2.2p4:
	// For an identifier declared with the storage-class specifier
	// extern in a scope in which a prior declaration of that
	// identifier is visible,23) if the prior declaration specifies
	// internal or external linkage, the linkage of the identifier at
	// the later declaration is the same as the linkage specified at
	// the prior declaration. If no prior declaration is visible, or
	// if the prior declaration specifies no linkage, then the
	// identifier has external linkage.
	if (New->hasExternalStorage() && Old->hasLinkage())
	/* Okay */;
	else if (New->getCanonicalDecl()->getStorageClass() != SC_Static &&
	!New->isStaticDataMember() &&
	Old->getCanonicalDecl()->getStorageClass() == SC_Static) {
	Diag(New->getLocation(), diag::err_non_static_static) << New->getDeclName();
	Diag(OldLocation, PrevDiag);
	return New->setInvalidDecl();
	}

	// Check if extern is followed by non-extern and vice-versa.
	if (New->hasExternalStorage() &&
	!Old->hasLinkage() && Old->isLocalVarDeclOrParm()) {
	Diag(New->getLocation(), diag::err_extern_non_extern) << New->getDeclName();
	Diag(OldLocation, PrevDiag);
	return New->setInvalidDecl();
	}
	if (Old->hasLinkage() && New->isLocalVarDeclOrParm() &&
	!New->hasExternalStorage()) {
	Diag(New->getLocation(), diag::err_non_extern_extern) << New->getDeclName();
	Diag(OldLocation, PrevDiag);
	return New->setInvalidDecl();
	}

	if (CheckRedeclarationInModule(New, Old))
	return;

	// Variables with external linkage are analyzed in FinalizeDeclaratorGroup.

	// FIXME: The test for external storage here seems wrong? We still
	// need to check for mismatches.
	if (!New->hasExternalStorage() && !New->isFileVarDecl() &&
	// Don't complain about out-of-line definitions of static members.
	!(Old->getLexicalDeclContext()->isRecord() &&
	!New->getLexicalDeclContext()->isRecord())) {
	Diag(New->getLocation(), diag::err_redefinition) << New->getDeclName();
	Diag(OldLocation, PrevDiag);
	return New->setInvalidDecl();
	}

	if (New->isInline() && !Old->getMostRecentDecl()->isInline()) {
	if (VarDecl *Def = Old->getDefinition()) {
	// C++1z [dcl.fcn.spec]p4:
	// If the definition of a variable appears in a translation unit before
	// its first declaration as inline, the program is ill-formed.
	Diag(New->getLocation(), diag::err_inline_decl_follows_def) << New;
	Diag(Def->getLocation(), diag::note_previous_definition);
	}
	}

	// If this redeclaration makes the variable inline, we may need to add it to
	// UndefinedButUsed.
	if (!Old->isInline() && New->isInline() && Old->isUsed(false) &&
	!Old->getDefinition() && !New->isThisDeclarationADefinition())
	UndefinedButUsed.insert(std::make_pair(Old->getCanonicalDecl(),
	SourceLocation()));

	if (New->getTLSKind() != Old->getTLSKind()) {
	if (!Old->getTLSKind()) {
	Diag(New->getLocation(), diag::err_thread_non_thread) << New->getDeclName();
	Diag(OldLocation, PrevDiag);
	} else if (!New->getTLSKind()) {
	Diag(New->getLocation(), diag::err_non_thread_thread) << New->getDeclName();
	Diag(OldLocation, PrevDiag);
	} else {
	// Do not allow redeclaration to change the variable between requiring
	// static and dynamic initialization.
	// FIXME: GCC allows this, but uses the TLS keyword on the first
	// declaration to determine the kind. Do we need to be compatible here?
	Diag(New->getLocation(), diag::err_thread_thread_different_kind)
	<< New->getDeclName() << (New->getTLSKind() == VarDecl::TLS_Dynamic);
	Diag(OldLocation, PrevDiag);
	}
	}

	// C++ doesn't have tentative definitions, so go right ahead and check here.
	if (getLangOpts().CPlusPlus) {
	if (Old->isStaticDataMember() && Old->getCanonicalDecl()->isInline() &&
	Old->getCanonicalDecl()->isConstexpr()) {
	// This definition won't be a definition any more once it's been merged.
	Diag(New->getLocation(),
	diag::warn_deprecated_redundant_constexpr_static_def);
	} else if (New->isThisDeclarationADefinition() == VarDecl::Definition) {
	VarDecl *Def = Old->getDefinition();
	if (Def && checkVarDeclRedefinition(Def, New))
	return;
	}
	}

	if (haveIncompatibleLanguageLinkages(Old, New)) {
	Diag(New->getLocation(), diag::err_different_language_linkage) << New;
	Diag(OldLocation, PrevDiag);
	New->setInvalidDecl();
	return;
	}

	// Merge "used" flag.
	if (Old->getMostRecentDecl()->isUsed(false))
	New->setIsUsed();

	// Keep a chain of previous declarations.
	New->setPreviousDecl(Old);
	if (NewTemplate)
	NewTemplate->setPreviousDecl(OldTemplate);

	// Inherit access appropriately.
	New->setAccess(Old->getAccess());
	if (NewTemplate)
	NewTemplate->setAccess(New->getAccess());

	if (Old->isInline())
	New->setImplicitlyInline();
	}

	void Sema::notePreviousDefinition(const NamedDecl *Old, SourceLocation New) {
	SourceManager &SrcMgr = getSourceManager();
	auto FNewDecLoc = SrcMgr.getDecomposedLoc(New);
	auto FOldDecLoc = SrcMgr.getDecomposedLoc(Old->getLocation());
	auto *FNew = SrcMgr.getFileEntryForID(FNewDecLoc.first);
	auto *FOld = SrcMgr.getFileEntryForID(FOldDecLoc.first);
	auto &HSI = PP.getHeaderSearchInfo();
	StringRef HdrFilename =
	SrcMgr.getFilename(SrcMgr.getSpellingLoc(Old->getLocation()));

	auto noteFromModuleOrInclude = [&](Module *Mod,
	SourceLocation IncLoc) -> bool {
	// Redefinition errors with modules are common with non modular mapped
	// headers, example: a non-modular header H in module A that also gets
	// included directly in a TU. Pointing twice to the same header/definition
	// is confusing, try to get better diagnostics when modules is on.
	if (IncLoc.isValid()) {
	if (Mod) {
	Diag(IncLoc, diag::note_redefinition_modules_same_file)
	<< HdrFilename.str() << Mod->getFullModuleName();
	if (!Mod->DefinitionLoc.isInvalid())
	Diag(Mod->DefinitionLoc, diag::note_defined_here)
	<< Mod->getFullModuleName();
	} else {
	Diag(IncLoc, diag::note_redefinition_include_same_file)
	<< HdrFilename.str();
	}
	return true;
	}

	return false;
	};

	// Is it the same file and same offset? Provide more information on why
	// this leads to a redefinition error.
	if (FNew == FOld && FNewDecLoc.second == FOldDecLoc.second) {
	SourceLocation OldIncLoc = SrcMgr.getIncludeLoc(FOldDecLoc.first);
	SourceLocation NewIncLoc = SrcMgr.getIncludeLoc(FNewDecLoc.first);
	bool EmittedDiag =
	noteFromModuleOrInclude(Old->getOwningModule(), OldIncLoc);
	EmittedDiag \|= noteFromModuleOrInclude(getCurrentModule(), NewIncLoc);

	// If the header has no guards, emit a note suggesting one.
	if (FOld && !HSI.isFileMultipleIncludeGuarded(FOld))
	Diag(Old->getLocation(), diag::note_use_ifdef_guards);

	if (EmittedDiag)
	return;
	}

	// Redefinition coming from different files or couldn't do better above.
	if (Old->getLocation().isValid())
	Diag(Old->getLocation(), diag::note_previous_definition);
	}

	/// We've just determined that \p Old and \p New both appear to be definitions
	/// of the same variable. Either diagnose or fix the problem.
	bool Sema::checkVarDeclRedefinition(VarDecl Old, VarDecl New) {
	if (!hasVisibleDefinition(Old) &&
	(New->getFormalLinkage() == InternalLinkage \|\|
	New->isInline() \|\|
	isa<VarTemplateSpecializationDecl>(New) \|\|
	New->getDescribedVarTemplate() \|\|
	New->getNumTemplateParameterLists() \|\|
	New->getDeclContext()->isDependentContext())) {
	// The previous definition is hidden, and multiple definitions are
	// permitted (in separate TUs). Demote this to a declaration.
	New->demoteThisDefinitionToDeclaration();

	// Make the canonical definition visible.
	if (auto *OldTD = Old->getDescribedVarTemplate())
	makeMergedDefinitionVisible(OldTD);
	makeMergedDefinitionVisible(Old);
	return false;
	} else {
	Diag(New->getLocation(), diag::err_redefinition) << New;
	notePreviousDefinition(Old, New->getLocation());
	New->setInvalidDecl();
	return true;
	}
	}

	/// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with
	/// no declarator (e.g. "struct foo;") is parsed.
	Decl Sema::ParsedFreeStandingDeclSpec(Scope S, AccessSpecifier AS,
	DeclSpec &DS,
	const ParsedAttributesView &DeclAttrs,
	RecordDecl *&AnonRecord) {
	return ParsedFreeStandingDeclSpec(
	S, AS, DS, DeclAttrs, MultiTemplateParamsArg(), false, AnonRecord);
	}

	// The MS ABI changed between VS2013 and VS2015 with regard to numbers used to
	// disambiguate entities defined in different scopes.
	// While the VS2015 ABI fixes potential miscompiles, it is also breaks
	// compatibility.
	// We will pick our mangling number depending on which version of MSVC is being
	// targeted.
	static unsigned getMSManglingNumber(const LangOptions &LO, Scope *S) {
	return LO.isCompatibleWithMSVC(LangOptions::MSVC2015)
	? S->getMSCurManglingNumber()
	: S->getMSLastManglingNumber();
	}

	void Sema::handleTagNumbering(const TagDecl Tag, Scope TagScope) {
	if (!Context.getLangOpts().CPlusPlus)
	return;

	if (isa<CXXRecordDecl>(Tag->getParent())) {
	// If this tag is the direct child of a class, number it if
	// it is anonymous.
	if (!Tag->getName().empty() \|\| Tag->getTypedefNameForAnonDecl())
	return;
	MangleNumberingContext &MCtx =
	Context.getManglingNumberContext(Tag->getParent());
	Context.setManglingNumber(
	Tag, MCtx.getManglingNumber(
	Tag, getMSManglingNumber(getLangOpts(), TagScope)));
	return;
	}

	// If this tag isn't a direct child of a class, number it if it is local.
	MangleNumberingContext *MCtx;
	Decl *ManglingContextDecl;
	std::tie(MCtx, ManglingContextDecl) =
	getCurrentMangleNumberContext(Tag->getDeclContext());
	if (MCtx) {
	Context.setManglingNumber(
	Tag, MCtx->getManglingNumber(
	Tag, getMSManglingNumber(getLangOpts(), TagScope)));
	}
	}

	namespace {
	struct NonCLikeKind {
	enum {
	None,
	BaseClass,
	DefaultMemberInit,
	Lambda,
	Friend,
	OtherMember,
	Invalid,
	} Kind = None;
	SourceRange Range;

	explicit operator bool() { return Kind != None; }
	};
	}

	/// Determine whether a class is C-like, according to the rules of C++
	/// [dcl.typedef] for anonymous classes with typedef names for linkage.
	static NonCLikeKind getNonCLikeKindForAnonymousStruct(const CXXRecordDecl *RD) {
	if (RD->isInvalidDecl())
	return {NonCLikeKind::Invalid, {}};

	// C++ [dcl.typedef]p9: [P1766R1]
	// An unnamed class with a typedef name for linkage purposes shall not
	//
	// -- have any base classes
	if (RD->getNumBases())
	return {NonCLikeKind::BaseClass,
	SourceRange(RD->bases_begin()->getBeginLoc(),
	RD->bases_end()[-1].getEndLoc())};
	bool Invalid = false;
	for (Decl *D : RD->decls()) {
	// Don't complain about things we already diagnosed.
	if (D->isInvalidDecl()) {
	Invalid = true;
	continue;
	}

	// -- have any [...] default member initializers
	if (auto *FD = dyn_cast<FieldDecl>(D)) {
	if (FD->hasInClassInitializer()) {
	auto *Init = FD->getInClassInitializer();
	return {NonCLikeKind::DefaultMemberInit,
	Init ? Init->getSourceRange() : D->getSourceRange()};
	}
	continue;
	}

	// FIXME: We don't allow friend declarations. This violates the wording of
	// P1766, but not the intent.
	if (isa<FriendDecl>(D))
	return {NonCLikeKind::Friend, D->getSourceRange()};

	// -- declare any members other than non-static data members, member
	// enumerations, or member classes,
	if (isa<StaticAssertDecl>(D) \|\| isa<IndirectFieldDecl>(D) \|\|
	isa<EnumDecl>(D))
	continue;
	auto *MemberRD = dyn_cast<CXXRecordDecl>(D);
	if (!MemberRD) {
	if (D->isImplicit())
	continue;
	return {NonCLikeKind::OtherMember, D->getSourceRange()};
	}

	// -- contain a lambda-expression,
	if (MemberRD->isLambda())
	return {NonCLikeKind::Lambda, MemberRD->getSourceRange()};

	// and all member classes shall also satisfy these requirements
	// (recursively).
	if (MemberRD->isThisDeclarationADefinition()) {
	if (auto Kind = getNonCLikeKindForAnonymousStruct(MemberRD))
	return Kind;
	}
	}

	return {Invalid ? NonCLikeKind::Invalid : NonCLikeKind::None, {}};
	}

	void Sema::setTagNameForLinkagePurposes(TagDecl *TagFromDeclSpec,
	TypedefNameDecl *NewTD) {
	if (TagFromDeclSpec->isInvalidDecl())
	return;

	// Do nothing if the tag already has a name for linkage purposes.
	if (TagFromDeclSpec->hasNameForLinkage())
	return;

	// A well-formed anonymous tag must always be a TUK_Definition.
	assert(TagFromDeclSpec->isThisDeclarationADefinition());

	// The type must match the tag exactly; no qualifiers allowed.
	if (!Context.hasSameType(NewTD->getUnderlyingType(),
	Context.getTagDeclType(TagFromDeclSpec))) {
	if (getLangOpts().CPlusPlus)
	Context.addTypedefNameForUnnamedTagDecl(TagFromDeclSpec, NewTD);
	return;
	}

	// C++ [dcl.typedef]p9: [P1766R1, applied as DR]
	// An unnamed class with a typedef name for linkage purposes shall [be
	// C-like].
	//
	// FIXME: Also diagnose if we've already computed the linkage. That ideally
	// shouldn't happen, but there are constructs that the language rule doesn't
	// disallow for which we can't reasonably avoid computing linkage early.
	const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(TagFromDeclSpec);
	NonCLikeKind NonCLike = RD ? getNonCLikeKindForAnonymousStruct(RD)
	: NonCLikeKind();
	bool ChangesLinkage = TagFromDeclSpec->hasLinkageBeenComputed();
	if (NonCLike \|\| ChangesLinkage) {
	if (NonCLike.Kind == NonCLikeKind::Invalid)
	return;

	unsigned DiagID = diag::ext_non_c_like_anon_struct_in_typedef;
	if (ChangesLinkage) {
	// If the linkage changes, we can't accept this as an extension.
	if (NonCLike.Kind == NonCLikeKind::None)
	DiagID = diag::err_typedef_changes_linkage;
	else
	DiagID = diag::err_non_c_like_anon_struct_in_typedef;
	}

	SourceLocation FixitLoc =
	getLocForEndOfToken(TagFromDeclSpec->getInnerLocStart());
	llvm::SmallString<40> TextToInsert;
	TextToInsert += ' ';
	TextToInsert += NewTD->getIdentifier()->getName();

	Diag(FixitLoc, DiagID)
	<< isa<TypeAliasDecl>(NewTD)
	<< FixItHint::CreateInsertion(FixitLoc, TextToInsert);
	if (NonCLike.Kind != NonCLikeKind::None) {
	Diag(NonCLike.Range.getBegin(), diag::note_non_c_like_anon_struct)
	<< NonCLike.Kind - 1 << NonCLike.Range;
	}
	Diag(NewTD->getLocation(), diag::note_typedef_for_linkage_here)
	<< NewTD << isa<TypeAliasDecl>(NewTD);

	if (ChangesLinkage)
	return;
	}

	// Otherwise, set this as the anon-decl typedef for the tag.
	TagFromDeclSpec->setTypedefNameForAnonDecl(NewTD);
	}

	static unsigned GetDiagnosticTypeSpecifierID(DeclSpec::TST T) {
	switch (T) {
	case DeclSpec::TST_class:
	return 0;
	case DeclSpec::TST_struct:
	return 1;
	case DeclSpec::TST_interface:
	return 2;
	case DeclSpec::TST_union:
	return 3;
	case DeclSpec::TST_enum:
	return 4;
	default:
	llvm_unreachable("unexpected type specifier");
	}
	}

	/// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with
	/// no declarator (e.g. "struct foo;") is parsed. It also accepts template
	/// parameters to cope with template friend declarations.
	Decl Sema::ParsedFreeStandingDeclSpec(Scope S, AccessSpecifier AS,
	DeclSpec &DS,
	const ParsedAttributesView &DeclAttrs,
	MultiTemplateParamsArg TemplateParams,
	bool IsExplicitInstantiation,
	RecordDecl *&AnonRecord) {
	Decl *TagD = nullptr;
	TagDecl *Tag = nullptr;
	if (DS.getTypeSpecType() == DeclSpec::TST_class \|\|
	DS.getTypeSpecType() == DeclSpec::TST_struct \|\|
	DS.getTypeSpecType() == DeclSpec::TST_interface \|\|
	DS.getTypeSpecType() == DeclSpec::TST_union \|\|
	DS.getTypeSpecType() == DeclSpec::TST_enum) {
	TagD = DS.getRepAsDecl();

	if (!TagD) // We probably had an error
	return nullptr;

	// Note that the above type specs guarantee that the
	// type rep is a Decl, whereas in many of the others
	// it's a Type.
	if (isa<TagDecl>(TagD))
	Tag = cast<TagDecl>(TagD);
	else if (ClassTemplateDecl *CTD = dyn_cast<ClassTemplateDecl>(TagD))
	Tag = CTD->getTemplatedDecl();
	}

	if (Tag) {
	handleTagNumbering(Tag, S);
	Tag->setFreeStanding();
	if (Tag->isInvalidDecl())
	return Tag;
	}

	if (unsigned TypeQuals = DS.getTypeQualifiers()) {
	// Enforce C99 6.7.3p2: "Types other than pointer types derived from object
	// or incomplete types shall not be restrict-qualified."
	if (TypeQuals & DeclSpec::TQ_restrict)
	Diag(DS.getRestrictSpecLoc(),
	diag::err_typecheck_invalid_restrict_not_pointer_noarg)
	<< DS.getSourceRange();
	}

	if (DS.isInlineSpecified())
	Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
	<< getLangOpts().CPlusPlus17;

	if (DS.hasConstexprSpecifier()) {
	// C++0x [dcl.constexpr]p1: constexpr can only be applied to declarations
	// and definitions of functions and variables.
	// C++2a [dcl.constexpr]p1: The consteval specifier shall be applied only to
	// the declaration of a function or function template
	if (Tag)
	Diag(DS.getConstexprSpecLoc(), diag::err_constexpr_tag)
	<< GetDiagnosticTypeSpecifierID(DS.getTypeSpecType())
	<< static_cast<int>(DS.getConstexprSpecifier());
	else
	Diag(DS.getConstexprSpecLoc(), diag::err_constexpr_wrong_decl_kind)
	<< static_cast<int>(DS.getConstexprSpecifier());
	// Don't emit warnings after this error.
	return TagD;
	}

	DiagnoseFunctionSpecifiers(DS);

	if (DS.isFriendSpecified()) {
	// If we're dealing with a decl but not a TagDecl, assume that
	// whatever routines created it handled the friendship aspect.
	if (TagD && !Tag)
	return nullptr;
	return ActOnFriendTypeDecl(S, DS, TemplateParams);
	}

	const CXXScopeSpec &SS = DS.getTypeSpecScope();
	bool IsExplicitSpecialization =
	!TemplateParams.empty() && TemplateParams.back()->size() == 0;
	if (Tag && SS.isNotEmpty() && !Tag->isCompleteDefinition() &&
	!IsExplicitInstantiation && !IsExplicitSpecialization &&
	!isa<ClassTemplatePartialSpecializationDecl>(Tag)) {
	// Per C++ [dcl.type.elab]p1, a class declaration cannot have a
	// nested-name-specifier unless it is an explicit instantiation
	// or an explicit specialization.
	//
	// FIXME: We allow class template partial specializations here too, per the
	// obvious intent of DR1819.
	//
	// Per C++ [dcl.enum]p1, an opaque-enum-declaration can't either.
	Diag(SS.getBeginLoc(), diag::err_standalone_class_nested_name_specifier)
	<< GetDiagnosticTypeSpecifierID(DS.getTypeSpecType()) << SS.getRange();
	return nullptr;
	}

	// Track whether this decl-specifier declares anything.
	bool DeclaresAnything = true;

	// Handle anonymous struct definitions.
	if (RecordDecl *Record = dyn_cast_or_null<RecordDecl>(Tag)) {
	if (!Record->getDeclName() && Record->isCompleteDefinition() &&
	DS.getStorageClassSpec() != DeclSpec::SCS_typedef) {
	if (getLangOpts().CPlusPlus \|\|
	Record->getDeclContext()->isRecord()) {
	// If CurContext is a DeclContext that can contain statements,
	// RecursiveASTVisitor won't visit the decls that
	// BuildAnonymousStructOrUnion() will put into CurContext.
	// Also store them here so that they can be part of the
	// DeclStmt that gets created in this case.
	// FIXME: Also return the IndirectFieldDecls created by
	// BuildAnonymousStructOr union, for the same reason?
	if (CurContext->isFunctionOrMethod())
	AnonRecord = Record;
	return BuildAnonymousStructOrUnion(S, DS, AS, Record,
	Context.getPrintingPolicy());
	}

	DeclaresAnything = false;
	}
	}

	// C11 6.7.2.1p2:
	// A struct-declaration that does not declare an anonymous structure or
	// anonymous union shall contain a struct-declarator-list.
	//
	// This rule also existed in C89 and C99; the grammar for struct-declaration
	// did not permit a struct-declaration without a struct-declarator-list.
	if (!getLangOpts().CPlusPlus && CurContext->isRecord() &&
	DS.getStorageClassSpec() == DeclSpec::SCS_unspecified) {
	// Check for Microsoft C extension: anonymous struct/union member.
	// Handle 2 kinds of anonymous struct/union:
	// struct STRUCT;
	// union UNION;
	// and
	// STRUCT_TYPE; <- where STRUCT_TYPE is a typedef struct.
	// UNION_TYPE; <- where UNION_TYPE is a typedef union.
	if ((Tag && Tag->getDeclName()) \|\|
	DS.getTypeSpecType() == DeclSpec::TST_typename) {
	RecordDecl *Record = nullptr;
	if (Tag)
	Record = dyn_cast<RecordDecl>(Tag);
	else if (const RecordType *RT =
	DS.getRepAsType().get()->getAsStructureType())
	Record = RT->getDecl();
	else if (const RecordType *UT = DS.getRepAsType().get()->getAsUnionType())
	Record = UT->getDecl();

	if (Record && getLangOpts().MicrosoftExt) {
	Diag(DS.getBeginLoc(), diag::ext_ms_anonymous_record)
	<< Record->isUnion() << DS.getSourceRange();
	return BuildMicrosoftCAnonymousStruct(S, DS, Record);
	}

	DeclaresAnything = false;
	}
	}

	// Skip all the checks below if we have a type error.
	if (DS.getTypeSpecType() == DeclSpec::TST_error \|\|
	(TagD && TagD->isInvalidDecl()))
	return TagD;

	if (getLangOpts().CPlusPlus &&
	DS.getStorageClassSpec() != DeclSpec::SCS_typedef)
	if (EnumDecl *Enum = dyn_cast_or_null<EnumDecl>(Tag))
	if (Enum->enumerator_begin() == Enum->enumerator_end() &&
	!Enum->getIdentifier() && !Enum->isInvalidDecl())
	DeclaresAnything = false;

	if (!DS.isMissingDeclaratorOk()) {
	// Customize diagnostic for a typedef missing a name.
	if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef)
	Diag(DS.getBeginLoc(), diag::ext_typedef_without_a_name)
	<< DS.getSourceRange();
	else
	DeclaresAnything = false;
	}

	if (DS.isModulePrivateSpecified() &&
	Tag && Tag->getDeclContext()->isFunctionOrMethod())
	Diag(DS.getModulePrivateSpecLoc(), diag::err_module_private_local_class)
	<< Tag->getTagKind()
	<< FixItHint::CreateRemoval(DS.getModulePrivateSpecLoc());

	ActOnDocumentableDecl(TagD);

	// C 6.7/2:
	// A declaration [...] shall declare at least a declarator [...], a tag,
	// or the members of an enumeration.
	// C++ [dcl.dcl]p3:
	// [If there are no declarators], and except for the declaration of an
	// unnamed bit-field, the decl-specifier-seq shall introduce one or more
	// names into the program, or shall redeclare a name introduced by a
	// previous declaration.
	if (!DeclaresAnything) {
	// In C, we allow this as a (popular) extension / bug. Don't bother
	// producing further diagnostics for redundant qualifiers after this.
	Diag(DS.getBeginLoc(), (IsExplicitInstantiation \|\| !TemplateParams.empty())
	? diag::err_no_declarators
	: diag::ext_no_declarators)
	<< DS.getSourceRange();
	return TagD;
	}

	// C++ [dcl.stc]p1:
	// If a storage-class-specifier appears in a decl-specifier-seq, [...] the
	// init-declarator-list of the declaration shall not be empty.
	// C++ [dcl.fct.spec]p1:
	// If a cv-qualifier appears in a decl-specifier-seq, the
	// init-declarator-list of the declaration shall not be empty.
	//
	// Spurious qualifiers here appear to be valid in C.
	unsigned DiagID = diag::warn_standalone_specifier;
	if (getLangOpts().CPlusPlus)
	DiagID = diag::ext_standalone_specifier;

	// Note that a linkage-specification sets a storage class, but
	// 'extern "C" struct foo;' is actually valid and not theoretically
	// useless.
	if (DeclSpec::SCS SCS = DS.getStorageClassSpec()) {
	if (SCS == DeclSpec::SCS_mutable)
	// Since mutable is not a viable storage class specifier in C, there is
	// no reason to treat it as an extension. Instead, diagnose as an error.
	Diag(DS.getStorageClassSpecLoc(), diag::err_mutable_nonmember);
	else if (!DS.isExternInLinkageSpec() && SCS != DeclSpec::SCS_typedef)
	Diag(DS.getStorageClassSpecLoc(), DiagID)
	<< DeclSpec::getSpecifierName(SCS);
	}

	if (DeclSpec::TSCS TSCS = DS.getThreadStorageClassSpec())
	Diag(DS.getThreadStorageClassSpecLoc(), DiagID)
	<< DeclSpec::getSpecifierName(TSCS);
	if (DS.getTypeQualifiers()) {
	if (DS.getTypeQualifiers() & DeclSpec::TQ_const)
	Diag(DS.getConstSpecLoc(), DiagID) << "const";
	if (DS.getTypeQualifiers() & DeclSpec::TQ_volatile)
	Diag(DS.getConstSpecLoc(), DiagID) << "volatile";
	// Restrict is covered above.
	if (DS.getTypeQualifiers() & DeclSpec::TQ_atomic)
	Diag(DS.getAtomicSpecLoc(), DiagID) << "_Atomic";
	if (DS.getTypeQualifiers() & DeclSpec::TQ_unaligned)
	Diag(DS.getUnalignedSpecLoc(), DiagID) << "__unaligned";
	}

	// Warn about ignored type attributes, for example:
	// __attribute__((aligned)) struct A;
	// Attributes should be placed after tag to apply to type declaration.
	if (!DS.getAttributes().empty() \|\| !DeclAttrs.empty()) {
	DeclSpec::TST TypeSpecType = DS.getTypeSpecType();
	if (TypeSpecType == DeclSpec::TST_class \|\|
	TypeSpecType == DeclSpec::TST_struct \|\|
	TypeSpecType == DeclSpec::TST_interface \|\|
	TypeSpecType == DeclSpec::TST_union \|\|
	TypeSpecType == DeclSpec::TST_enum) {
	for (const ParsedAttr &AL : DS.getAttributes())
	Diag(AL.getLoc(), diag::warn_declspec_attribute_ignored)
	<< AL << GetDiagnosticTypeSpecifierID(TypeSpecType);
	for (const ParsedAttr &AL : DeclAttrs)
	Diag(AL.getLoc(), diag::warn_declspec_attribute_ignored)
	<< AL << GetDiagnosticTypeSpecifierID(TypeSpecType);
	}
	}

	return TagD;
	}

	/// We are trying to inject an anonymous member into the given scope;
	/// check if there's an existing declaration that can't be overloaded.
	///
	/// \return true if this is a forbidden redeclaration
	static bool CheckAnonMemberRedeclaration(Sema &SemaRef,
	Scope *S,
	DeclContext *Owner,
	DeclarationName Name,
	SourceLocation NameLoc,
	bool IsUnion) {
	LookupResult R(SemaRef, Name, NameLoc, Sema::LookupMemberName,
	Sema::ForVisibleRedeclaration);
	if (!SemaRef.LookupName(R, S)) return false;

	// Pick a representative declaration.
	NamedDecl *PrevDecl = R.getRepresentativeDecl()->getUnderlyingDecl();
	assert(PrevDecl && "Expected a non-null Decl");

	if (!SemaRef.isDeclInScope(PrevDecl, Owner, S))
	return false;

	SemaRef.Diag(NameLoc, diag::err_anonymous_record_member_redecl)
	<< IsUnion << Name;
	SemaRef.Diag(PrevDecl->getLocation(), diag::note_previous_declaration);

	return true;
	}

	/// InjectAnonymousStructOrUnionMembers - Inject the members of the
	/// anonymous struct or union AnonRecord into the owning context Owner
	/// and scope S. This routine will be invoked just after we realize
	/// that an unnamed union or struct is actually an anonymous union or
	/// struct, e.g.,
	///
	/// @code
	/// union {
	/// int i;
	/// float f;
	/// }; // InjectAnonymousStructOrUnionMembers called here to inject i and
	/// // f into the surrounding scope.x
	/// @endcode
	///
	/// This routine is recursive, injecting the names of nested anonymous
	/// structs/unions into the owning context and scope as well.
	static bool
	InjectAnonymousStructOrUnionMembers(Sema &SemaRef, Scope S, DeclContext Owner,
	RecordDecl *AnonRecord, AccessSpecifier AS,
	SmallVectorImpl<NamedDecl *> &Chaining) {
	bool Invalid = false;

	// Look every FieldDecl and IndirectFieldDecl with a name.
	for (auto *D : AnonRecord->decls()) {
	if ((isa<FieldDecl>(D) \|\| isa<IndirectFieldDecl>(D)) &&
	cast<NamedDecl>(D)->getDeclName()) {
	ValueDecl *VD = cast<ValueDecl>(D);
	if (CheckAnonMemberRedeclaration(SemaRef, S, Owner, VD->getDeclName(),
	VD->getLocation(),
	AnonRecord->isUnion())) {
	// C++ [class.union]p2:
	// The names of the members of an anonymous union shall be
	// distinct from the names of any other entity in the
	// scope in which the anonymous union is declared.
	Invalid = true;
	} else {
	// C++ [class.union]p2:
	// For the purpose of name lookup, after the anonymous union
	// definition, the members of the anonymous union are
	// considered to have been defined in the scope in which the
	// anonymous union is declared.
	unsigned OldChainingSize = Chaining.size();
	if (IndirectFieldDecl *IF = dyn_cast<IndirectFieldDecl>(VD))
	Chaining.append(IF->chain_begin(), IF->chain_end());
	else
	Chaining.push_back(VD);

	assert(Chaining.size() >= 2);
	NamedDecl **NamedChain =
	new (SemaRef.Context)NamedDecl*[Chaining.size()];
	for (unsigned i = 0; i < Chaining.size(); i++)
	NamedChain[i] = Chaining[i];

	IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create(
	SemaRef.Context, Owner, VD->getLocation(), VD->getIdentifier(),
	VD->getType(), {NamedChain, Chaining.size()});

	for (const auto *Attr : VD->attrs())
	IndirectField->addAttr(Attr->clone(SemaRef.Context));

	IndirectField->setAccess(AS);
	IndirectField->setImplicit();
	SemaRef.PushOnScopeChains(IndirectField, S);

	// That includes picking up the appropriate access specifier.
	if (AS != AS_none) IndirectField->setAccess(AS);

	Chaining.resize(OldChainingSize);
	}
	}
	}

	return Invalid;
	}

	/// StorageClassSpecToVarDeclStorageClass - Maps a DeclSpec::SCS to
	/// a VarDecl::StorageClass. Any error reporting is up to the caller:
	/// illegal input values are mapped to SC_None.
	static StorageClass
	StorageClassSpecToVarDeclStorageClass(const DeclSpec &DS) {
	DeclSpec::SCS StorageClassSpec = DS.getStorageClassSpec();
	assert(StorageClassSpec != DeclSpec::SCS_typedef &&
	"Parser allowed 'typedef' as storage class VarDecl.");
	switch (StorageClassSpec) {
	case DeclSpec::SCS_unspecified: return SC_None;
	case DeclSpec::SCS_extern:
	if (DS.isExternInLinkageSpec())
	return SC_None;
	return SC_Extern;
	case DeclSpec::SCS_static: return SC_Static;
	case DeclSpec::SCS_auto: return SC_Auto;
	case DeclSpec::SCS_register: return SC_Register;
	case DeclSpec::SCS_private_extern: return SC_PrivateExtern;
	// Illegal SCSs map to None: error reporting is up to the caller.
	case DeclSpec::SCS_mutable: // Fall through.
	case DeclSpec::SCS_typedef: return SC_None;
	}
	llvm_unreachable("unknown storage class specifier");
	}

	static SourceLocation findDefaultInitializer(const CXXRecordDecl *Record) {
	assert(Record->hasInClassInitializer());

	for (const auto *I : Record->decls()) {
	const auto *FD = dyn_cast<FieldDecl>(I);
	if (const auto *IFD = dyn_cast<IndirectFieldDecl>(I))
	FD = IFD->getAnonField();
	if (FD && FD->hasInClassInitializer())
	return FD->getLocation();
	}

	llvm_unreachable("couldn't find in-class initializer");
	}

	static void checkDuplicateDefaultInit(Sema &S, CXXRecordDecl *Parent,
	SourceLocation DefaultInitLoc) {
	if (!Parent->isUnion() \|\| !Parent->hasInClassInitializer())
	return;

	S.Diag(DefaultInitLoc, diag::err_multiple_mem_union_initialization);
	S.Diag(findDefaultInitializer(Parent), diag::note_previous_initializer) << 0;
	}

	static void checkDuplicateDefaultInit(Sema &S, CXXRecordDecl *Parent,
	CXXRecordDecl *AnonUnion) {
	if (!Parent->isUnion() \|\| !Parent->hasInClassInitializer())
	return;

	checkDuplicateDefaultInit(S, Parent, findDefaultInitializer(AnonUnion));
	}

	/// BuildAnonymousStructOrUnion - Handle the declaration of an
	/// anonymous structure or union. Anonymous unions are a C++ feature
	/// (C++ [class.union]) and a C11 feature; anonymous structures
	/// are a C11 feature and GNU C++ extension.
	Decl Sema::BuildAnonymousStructOrUnion(Scope S, DeclSpec &DS,
	AccessSpecifier AS,
	RecordDecl *Record,
	const PrintingPolicy &Policy) {
	DeclContext *Owner = Record->getDeclContext();

	// Diagnose whether this anonymous struct/union is an extension.
	if (Record->isUnion() && !getLangOpts().CPlusPlus && !getLangOpts().C11)
	Diag(Record->getLocation(), diag::ext_anonymous_union);
	else if (!Record->isUnion() && getLangOpts().CPlusPlus)
	Diag(Record->getLocation(), diag::ext_gnu_anonymous_struct);
	else if (!Record->isUnion() && !getLangOpts().C11)
	Diag(Record->getLocation(), diag::ext_c11_anonymous_struct);

	// C and C++ require different kinds of checks for anonymous
	// structs/unions.
	bool Invalid = false;
	if (getLangOpts().CPlusPlus) {
	const char *PrevSpec = nullptr;
	if (Record->isUnion()) {
	// C++ [class.union]p6:
	// C++17 [class.union.anon]p2:
	// Anonymous unions declared in a named namespace or in the
	// global namespace shall be declared static.
	unsigned DiagID;
	DeclContext *OwnerScope = Owner->getRedeclContext();
	if (DS.getStorageClassSpec() != DeclSpec::SCS_static &&
	(OwnerScope->isTranslationUnit() \|\|
	(OwnerScope->isNamespace() &&
	!cast<NamespaceDecl>(OwnerScope)->isAnonymousNamespace()))) {
	Diag(Record->getLocation(), diag::err_anonymous_union_not_static)
	<< FixItHint::CreateInsertion(Record->getLocation(), "static ");

	// Recover by adding 'static'.
	DS.SetStorageClassSpec(*this, DeclSpec::SCS_static, SourceLocation(),
	PrevSpec, DiagID, Policy);
	}
	// C++ [class.union]p6:
	// A storage class is not allowed in a declaration of an
	// anonymous union in a class scope.
	else if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified &&
	isa<RecordDecl>(Owner)) {
	Diag(DS.getStorageClassSpecLoc(),
	diag::err_anonymous_union_with_storage_spec)
	<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());

	// Recover by removing the storage specifier.
	DS.SetStorageClassSpec(*this, DeclSpec::SCS_unspecified,
	SourceLocation(),
	PrevSpec, DiagID, Context.getPrintingPolicy());
	}
	}

	// Ignore const/volatile/restrict qualifiers.
	if (DS.getTypeQualifiers()) {
	if (DS.getTypeQualifiers() & DeclSpec::TQ_const)
	Diag(DS.getConstSpecLoc(), diag::ext_anonymous_struct_union_qualified)
	<< Record->isUnion() << "const"
	<< FixItHint::CreateRemoval(DS.getConstSpecLoc());
	if (DS.getTypeQualifiers() & DeclSpec::TQ_volatile)
	Diag(DS.getVolatileSpecLoc(),
	diag::ext_anonymous_struct_union_qualified)
	<< Record->isUnion() << "volatile"
	<< FixItHint::CreateRemoval(DS.getVolatileSpecLoc());
	if (DS.getTypeQualifiers() & DeclSpec::TQ_restrict)
	Diag(DS.getRestrictSpecLoc(),
	diag::ext_anonymous_struct_union_qualified)
	<< Record->isUnion() << "restrict"
	<< FixItHint::CreateRemoval(DS.getRestrictSpecLoc());
	if (DS.getTypeQualifiers() & DeclSpec::TQ_atomic)
	Diag(DS.getAtomicSpecLoc(),
	diag::ext_anonymous_struct_union_qualified)
	<< Record->isUnion() << "_Atomic"
	<< FixItHint::CreateRemoval(DS.getAtomicSpecLoc());
	if (DS.getTypeQualifiers() & DeclSpec::TQ_unaligned)
	Diag(DS.getUnalignedSpecLoc(),
	diag::ext_anonymous_struct_union_qualified)
	<< Record->isUnion() << "__unaligned"
	<< FixItHint::CreateRemoval(DS.getUnalignedSpecLoc());

	DS.ClearTypeQualifiers();
	}

	// C++ [class.union]p2:
	// The member-specification of an anonymous union shall only
	// define non-static data members. [Note: nested types and
	// functions cannot be declared within an anonymous union. ]
	for (auto *Mem : Record->decls()) {
	// Ignore invalid declarations; we already diagnosed them.
	if (Mem->isInvalidDecl())
	continue;

	if (auto *FD = dyn_cast<FieldDecl>(Mem)) {
	// C++ [class.union]p3:
	// An anonymous union shall not have private or protected
	// members (clause 11).
	assert(FD->getAccess() != AS_none);
	if (FD->getAccess() != AS_public) {
	Diag(FD->getLocation(), diag::err_anonymous_record_nonpublic_member)
	<< Record->isUnion() << (FD->getAccess() == AS_protected);
	Invalid = true;
	}

	// C++ [class.union]p1
	// An object of a class with a non-trivial constructor, a non-trivial
	// copy constructor, a non-trivial destructor, or a non-trivial copy
	// assignment operator cannot be a member of a union, nor can an
	// array of such objects.
	if (CheckNontrivialField(FD))
	Invalid = true;
	} else if (Mem->isImplicit()) {
	// Any implicit members are fine.
	} else if (isa<TagDecl>(Mem) && Mem->getDeclContext() != Record) {
	// This is a type that showed up in an
	// elaborated-type-specifier inside the anonymous struct or
	// union, but which actually declares a type outside of the
	// anonymous struct or union. It's okay.
	} else if (auto *MemRecord = dyn_cast<RecordDecl>(Mem)) {
	if (!MemRecord->isAnonymousStructOrUnion() &&
	MemRecord->getDeclName()) {
	// Visual C++ allows type definition in anonymous struct or union.
	if (getLangOpts().MicrosoftExt)
	Diag(MemRecord->getLocation(), diag::ext_anonymous_record_with_type)
	<< Record->isUnion();
	else {
	// This is a nested type declaration.
	Diag(MemRecord->getLocation(), diag::err_anonymous_record_with_type)
	<< Record->isUnion();
	Invalid = true;
	}
	} else {
	// This is an anonymous type definition within another anonymous type.
	// This is a popular extension, provided by Plan9, MSVC and GCC, but
	// not part of standard C++.
	Diag(MemRecord->getLocation(),
	diag::ext_anonymous_record_with_anonymous_type)
	<< Record->isUnion();
	}
	} else if (isa<AccessSpecDecl>(Mem)) {
	// Any access specifier is fine.
	} else if (isa<StaticAssertDecl>(Mem)) {
	// In C++1z, static_assert declarations are also fine.
	} else {
	// We have something that isn't a non-static data
	// member. Complain about it.
	unsigned DK = diag::err_anonymous_record_bad_member;
	if (isa<TypeDecl>(Mem))
	DK = diag::err_anonymous_record_with_type;
	else if (isa<FunctionDecl>(Mem))
	DK = diag::err_anonymous_record_with_function;
	else if (isa<VarDecl>(Mem))
	DK = diag::err_anonymous_record_with_static;

	// Visual C++ allows type definition in anonymous struct or union.
	if (getLangOpts().MicrosoftExt &&
	DK == diag::err_anonymous_record_with_type)
	Diag(Mem->getLocation(), diag::ext_anonymous_record_with_type)
	<< Record->isUnion();
	else {
	Diag(Mem->getLocation(), DK) << Record->isUnion();
	Invalid = true;
	}
	}
	}

	// C++11 [class.union]p8 (DR1460):
	// At most one variant member of a union may have a
	// brace-or-equal-initializer.
	if (cast<CXXRecordDecl>(Record)->hasInClassInitializer() &&
	Owner->isRecord())
	checkDuplicateDefaultInit(*this, cast<CXXRecordDecl>(Owner),
	cast<CXXRecordDecl>(Record));
	}

	if (!Record->isUnion() && !Owner->isRecord()) {
	Diag(Record->getLocation(), diag::err_anonymous_struct_not_member)
	<< getLangOpts().CPlusPlus;
	Invalid = true;
	}

	// C++ [dcl.dcl]p3:
	// [If there are no declarators], and except for the declaration of an
	// unnamed bit-field, the decl-specifier-seq shall introduce one or more
	// names into the program
	// C++ [class.mem]p2:
	// each such member-declaration shall either declare at least one member
	// name of the class or declare at least one unnamed bit-field
	//
	// For C this is an error even for a named struct, and is diagnosed elsewhere.
	if (getLangOpts().CPlusPlus && Record->field_empty())
	Diag(DS.getBeginLoc(), diag::ext_no_declarators) << DS.getSourceRange();

	// Mock up a declarator.
	Declarator Dc(DS, ParsedAttributesView::none(), DeclaratorContext::Member);
	TypeSourceInfo *TInfo = GetTypeForDeclarator(Dc, S);
	assert(TInfo && "couldn't build declarator info for anonymous struct/union");

	// Create a declaration for this anonymous struct/union.
	NamedDecl *Anon = nullptr;
	if (RecordDecl *OwningClass = dyn_cast<RecordDecl>(Owner)) {
	Anon = FieldDecl::Create(
	Context, OwningClass, DS.getBeginLoc(), Record->getLocation(),
	/IdentifierInfo=/nullptr, Context.getTypeDeclType(Record), TInfo,
	/BitWidth=/nullptr, /Mutable=/false,
	/InitStyle=/ICIS_NoInit);
	Anon->setAccess(AS);
	ProcessDeclAttributes(S, Anon, Dc);

	if (getLangOpts().CPlusPlus)
	FieldCollector->Add(cast<FieldDecl>(Anon));
	} else {
	DeclSpec::SCS SCSpec = DS.getStorageClassSpec();
	StorageClass SC = StorageClassSpecToVarDeclStorageClass(DS);
	if (SCSpec == DeclSpec::SCS_mutable) {
	// mutable can only appear on non-static class members, so it's always
	// an error here
	Diag(Record->getLocation(), diag::err_mutable_nonmember);
	Invalid = true;
	SC = SC_None;
	}

	assert(DS.getAttributes().empty() && "No attribute expected");
	Anon = VarDecl::Create(Context, Owner, DS.getBeginLoc(),
	Record->getLocation(), /IdentifierInfo=/nullptr,
	Context.getTypeDeclType(Record), TInfo, SC);

	// Default-initialize the implicit variable. This initialization will be
	// trivial in almost all cases, except if a union member has an in-class
	// initializer:
	// union { int n = 0; };
	ActOnUninitializedDecl(Anon);
	}
	Anon->setImplicit();

	// Mark this as an anonymous struct/union type.
	Record->setAnonymousStructOrUnion(true);

	// Add the anonymous struct/union object to the current
	// context. We'll be referencing this object when we refer to one of
	// its members.
	Owner->addDecl(Anon);

	// Inject the members of the anonymous struct/union into the owning
	// context and into the identifier resolver chain for name lookup
	// purposes.
	SmallVector<NamedDecl*, 2> Chain;
	Chain.push_back(Anon);

	if (InjectAnonymousStructOrUnionMembers(*this, S, Owner, Record, AS, Chain))
	Invalid = true;

	if (VarDecl *NewVD = dyn_cast<VarDecl>(Anon)) {
	if (getLangOpts().CPlusPlus && NewVD->isStaticLocal()) {
	MangleNumberingContext *MCtx;
	Decl *ManglingContextDecl;
	std::tie(MCtx, ManglingContextDecl) =
	getCurrentMangleNumberContext(NewVD->getDeclContext());
	if (MCtx) {
	Context.setManglingNumber(
	NewVD, MCtx->getManglingNumber(
	NewVD, getMSManglingNumber(getLangOpts(), S)));
	Context.setStaticLocalNumber(NewVD, MCtx->getStaticLocalNumber(NewVD));
	}
	}
	}

	if (Invalid)
	Anon->setInvalidDecl();

	return Anon;
	}

	/// BuildMicrosoftCAnonymousStruct - Handle the declaration of an
	/// Microsoft C anonymous structure.
	/// Ref: http://msdn.microsoft.com/en-us/library/z2cx9y4f.aspx
	/// Example:
	///
	/// struct A { int a; };
	/// struct B { struct A; int b; };
	///
	/// void foo() {
	/// B var;
	/// var.a = 3;
	/// }
	///
	Decl Sema::BuildMicrosoftCAnonymousStruct(Scope S, DeclSpec &DS,
	RecordDecl *Record) {
	assert(Record && "expected a record!");

	// Mock up a declarator.
	Declarator Dc(DS, ParsedAttributesView::none(), DeclaratorContext::TypeName);
	TypeSourceInfo *TInfo = GetTypeForDeclarator(Dc, S);
	assert(TInfo && "couldn't build declarator info for anonymous struct");

	auto *ParentDecl = cast<RecordDecl>(CurContext);
	QualType RecTy = Context.getTypeDeclType(Record);

	// Create a declaration for this anonymous struct.
	NamedDecl *Anon =
	FieldDecl::Create(Context, ParentDecl, DS.getBeginLoc(), DS.getBeginLoc(),
	/IdentifierInfo=/nullptr, RecTy, TInfo,
	/BitWidth=/nullptr, /Mutable=/false,
	/InitStyle=/ICIS_NoInit);
	Anon->setImplicit();

	// Add the anonymous struct object to the current context.
	CurContext->addDecl(Anon);

	// Inject the members of the anonymous struct into the current
	// context and into the identifier resolver chain for name lookup
	// purposes.
	SmallVector<NamedDecl*, 2> Chain;
	Chain.push_back(Anon);

	RecordDecl *RecordDef = Record->getDefinition();
	if (RequireCompleteSizedType(Anon->getLocation(), RecTy,
	diag::err_field_incomplete_or_sizeless) \|\|
	InjectAnonymousStructOrUnionMembers(*this, S, CurContext, RecordDef,
	AS_none, Chain)) {
	Anon->setInvalidDecl();
	ParentDecl->setInvalidDecl();
	}

	return Anon;
	}

	/// GetNameForDeclarator - Determine the full declaration name for the
	/// given Declarator.
	DeclarationNameInfo Sema::GetNameForDeclarator(Declarator &D) {
	return GetNameFromUnqualifiedId(D.getName());
	}

	/// Retrieves the declaration name from a parsed unqualified-id.
	DeclarationNameInfo
	Sema::GetNameFromUnqualifiedId(const UnqualifiedId &Name) {
	DeclarationNameInfo NameInfo;
	NameInfo.setLoc(Name.StartLocation);

	switch (Name.getKind()) {

	case UnqualifiedIdKind::IK_ImplicitSelfParam:
	case UnqualifiedIdKind::IK_Identifier:
	NameInfo.setName(Name.Identifier);
	return NameInfo;

	case UnqualifiedIdKind::IK_DeductionGuideName: {
	// C++ [temp.deduct.guide]p3:
	// The simple-template-id shall name a class template specialization.
	// The template-name shall be the same identifier as the template-name
	// of the simple-template-id.
	// These together intend to imply that the template-name shall name a
	// class template.
	// FIXME: template<typename T> struct X {};
	// template<typename T> using Y = X<T>;
	// Y(int) -> Y<int>;
	// satisfies these rules but does not name a class template.
	TemplateName TN = Name.TemplateName.get().get();
	auto *Template = TN.getAsTemplateDecl();
	if (!Template \|\| !isa<ClassTemplateDecl>(Template)) {
	Diag(Name.StartLocation,
	diag::err_deduction_guide_name_not_class_template)
	<< (int)getTemplateNameKindForDiagnostics(TN) << TN;
	if (Template)
	Diag(Template->getLocation(), diag::note_template_decl_here);
	return DeclarationNameInfo();
	}

	NameInfo.setName(
	Context.DeclarationNames.getCXXDeductionGuideName(Template));
	return NameInfo;
	}

	case UnqualifiedIdKind::IK_OperatorFunctionId:
	NameInfo.setName(Context.DeclarationNames.getCXXOperatorName(
	Name.OperatorFunctionId.Operator));
	NameInfo.setCXXOperatorNameRange(SourceRange(
	Name.OperatorFunctionId.SymbolLocations[0], Name.EndLocation));
	return NameInfo;

	case UnqualifiedIdKind::IK_LiteralOperatorId:
	NameInfo.setName(Context.DeclarationNames.getCXXLiteralOperatorName(
	Name.Identifier));
	NameInfo.setCXXLiteralOperatorNameLoc(Name.EndLocation);
	return NameInfo;

	case UnqualifiedIdKind::IK_ConversionFunctionId: {
	TypeSourceInfo *TInfo;
	QualType Ty = GetTypeFromParser(Name.ConversionFunctionId, &TInfo);
	if (Ty.isNull())
	return DeclarationNameInfo();
	NameInfo.setName(Context.DeclarationNames.getCXXConversionFunctionName(
	Context.getCanonicalType(Ty)));
	NameInfo.setNamedTypeInfo(TInfo);
	return NameInfo;
	}

	case UnqualifiedIdKind::IK_ConstructorName: {
	TypeSourceInfo *TInfo;
	QualType Ty = GetTypeFromParser(Name.ConstructorName, &TInfo);
	if (Ty.isNull())
	return DeclarationNameInfo();
	NameInfo.setName(Context.DeclarationNames.getCXXConstructorName(
	Context.getCanonicalType(Ty)));
	NameInfo.setNamedTypeInfo(TInfo);
	return NameInfo;
	}

	case UnqualifiedIdKind::IK_ConstructorTemplateId: {
	// In well-formed code, we can only have a constructor
	// template-id that refers to the current context, so go there
	// to find the actual type being constructed.
	CXXRecordDecl *CurClass = dyn_cast<CXXRecordDecl>(CurContext);
	if (!CurClass \|\| CurClass->getIdentifier() != Name.TemplateId->Name)
	return DeclarationNameInfo();

	// Determine the type of the class being constructed.
	QualType CurClassType = Context.getTypeDeclType(CurClass);

	// FIXME: Check two things: that the template-id names the same type as
	// CurClassType, and that the template-id does not occur when the name
	// was qualified.

	NameInfo.setName(Context.DeclarationNames.getCXXConstructorName(
	Context.getCanonicalType(CurClassType)));
	// FIXME: should we retrieve TypeSourceInfo?
	NameInfo.setNamedTypeInfo(nullptr);
	return NameInfo;
	}

	case UnqualifiedIdKind::IK_DestructorName: {
	TypeSourceInfo *TInfo;
	QualType Ty = GetTypeFromParser(Name.DestructorName, &TInfo);
	if (Ty.isNull())
	return DeclarationNameInfo();
	NameInfo.setName(Context.DeclarationNames.getCXXDestructorName(
	Context.getCanonicalType(Ty)));
	NameInfo.setNamedTypeInfo(TInfo);
	return NameInfo;
	}

	case UnqualifiedIdKind::IK_TemplateId: {
	TemplateName TName = Name.TemplateId->Template.get();
	SourceLocation TNameLoc = Name.TemplateId->TemplateNameLoc;
	return Context.getNameForTemplate(TName, TNameLoc);
	}

	} // switch (Name.getKind())

	llvm_unreachable("Unknown name kind");
	}

	static QualType getCoreType(QualType Ty) {
	do {
	if (Ty->isPointerType() \|\| Ty->isReferenceType())
	Ty = Ty->getPointeeType();
	else if (Ty->isArrayType())
	Ty = Ty->castAsArrayTypeUnsafe()->getElementType();
	else
	return Ty.withoutLocalFastQualifiers();
	} while (true);
	}

	/// hasSimilarParameters - Determine whether the C++ functions Declaration
	/// and Definition have "nearly" matching parameters. This heuristic is
	/// used to improve diagnostics in the case where an out-of-line function
	/// definition doesn't match any declaration within the class or namespace.
	/// Also sets Params to the list of indices to the parameters that differ
	/// between the declaration and the definition. If hasSimilarParameters
	/// returns true and Params is empty, then all of the parameters match.
	static bool hasSimilarParameters(ASTContext &Context,
	FunctionDecl *Declaration,
	FunctionDecl *Definition,
	SmallVectorImpl<unsigned> &Params) {
	Params.clear();
	if (Declaration->param_size() != Definition->param_size())
	return false;
	for (unsigned Idx = 0; Idx < Declaration->param_size(); ++Idx) {
	QualType DeclParamTy = Declaration->getParamDecl(Idx)->getType();
	QualType DefParamTy = Definition->getParamDecl(Idx)->getType();

	// The parameter types are identical
	if (Context.hasSameUnqualifiedType(DefParamTy, DeclParamTy))
	continue;

	QualType DeclParamBaseTy = getCoreType(DeclParamTy);
	QualType DefParamBaseTy = getCoreType(DefParamTy);
	const IdentifierInfo *DeclTyName = DeclParamBaseTy.getBaseTypeIdentifier();
	const IdentifierInfo *DefTyName = DefParamBaseTy.getBaseTypeIdentifier();

	if (Context.hasSameUnqualifiedType(DeclParamBaseTy, DefParamBaseTy) \|\|
	(DeclTyName && DeclTyName == DefTyName))
	Params.push_back(Idx);
	else // The two parameters aren't even close
	return false;
	}

	return true;
	}

	/// RebuildDeclaratorInCurrentInstantiation - Checks whether the given
	/// declarator needs to be rebuilt in the current instantiation.
	/// Any bits of declarator which appear before the name are valid for
	/// consideration here. That's specifically the type in the decl spec
	/// and the base type in any member-pointer chunks.
	static bool RebuildDeclaratorInCurrentInstantiation(Sema &S, Declarator &D,
	DeclarationName Name) {
	// The types we specifically need to rebuild are:
	// - typenames, typeofs, and decltypes
	// - types which will become injected class names
	// Of course, we also need to rebuild any type referencing such a
	// type. It's safest to just say "dependent", but we call out a
	// few cases here.

	DeclSpec &DS = D.getMutableDeclSpec();
	switch (DS.getTypeSpecType()) {
	case DeclSpec::TST_typename:
	case DeclSpec::TST_typeofType:
	case DeclSpec::TST_typeof_unqualType:
	#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case DeclSpec::TST_##Trait:
	#include "clang/Basic/TransformTypeTraits.def"
	case DeclSpec::TST_atomic: {
	// Grab the type from the parser.
	TypeSourceInfo *TSI = nullptr;
	QualType T = S.GetTypeFromParser(DS.getRepAsType(), &TSI);
	if (T.isNull() \|\| !T->isInstantiationDependentType()) break;

	// Make sure there's a type source info. This isn't really much
	// of a waste; most dependent types should have type source info
	// attached already.
	if (!TSI)
	TSI = S.Context.getTrivialTypeSourceInfo(T, DS.getTypeSpecTypeLoc());

	// Rebuild the type in the current instantiation.
	TSI = S.RebuildTypeInCurrentInstantiation(TSI, D.getIdentifierLoc(), Name);
	if (!TSI) return true;

	// Store the new type back in the decl spec.
	ParsedType LocType = S.CreateParsedType(TSI->getType(), TSI);
	DS.UpdateTypeRep(LocType);
	break;
	}

	case DeclSpec::TST_decltype:
	case DeclSpec::TST_typeof_unqualExpr:
	case DeclSpec::TST_typeofExpr: {
	Expr *E = DS.getRepAsExpr();
	ExprResult Result = S.RebuildExprInCurrentInstantiation(E);
	if (Result.isInvalid()) return true;
	DS.UpdateExprRep(Result.get());
	break;
	}

	default:
	// Nothing to do for these decl specs.
	break;
	}

	// It doesn't matter what order we do this in.
	for (unsigned I = 0, E = D.getNumTypeObjects(); I != E; ++I) {
	DeclaratorChunk &Chunk = D.getTypeObject(I);

	// The only type information in the declarator which can come
	// before the declaration name is the base type of a member
	// pointer.
	if (Chunk.Kind != DeclaratorChunk::MemberPointer)
	continue;

	// Rebuild the scope specifier in-place.
	CXXScopeSpec &SS = Chunk.Mem.Scope();
	if (S.RebuildNestedNameSpecifierInCurrentInstantiation(SS))
	return true;
	}

	return false;
	}

	/// Returns true if the declaration is declared in a system header or from a
	/// system macro.
	static bool isFromSystemHeader(SourceManager &SM, const Decl *D) {
	return SM.isInSystemHeader(D->getLocation()) \|\|
	SM.isInSystemMacro(D->getLocation());
	}

	void Sema::warnOnReservedIdentifier(const NamedDecl *D) {
	// Avoid warning twice on the same identifier, and don't warn on redeclaration
	// of system decl.
	if (D->getPreviousDecl() \|\| D->isImplicit())
	return;
	ReservedIdentifierStatus Status = D->isReserved(getLangOpts());
	if (Status != ReservedIdentifierStatus::NotReserved &&
	!isFromSystemHeader(Context.getSourceManager(), D)) {
	Diag(D->getLocation(), diag::warn_reserved_extern_symbol)
	<< D << static_cast<int>(Status);
	}
	}

	Decl Sema::ActOnDeclarator(Scope S, Declarator &D) {
	D.setFunctionDefinitionKind(FunctionDefinitionKind::Declaration);

	// Check if we are in an `omp begin/end declare variant` scope. Handle this
	// declaration only if the `bind_to_declaration` extension is set.
	SmallVector<FunctionDecl *, 4> Bases;
	if (LangOpts.OpenMP && isInOpenMPDeclareVariantScope())
	if (getOMPTraitInfoForSurroundingScope()->isExtensionActive(llvm::omp::TraitProperty::
	implementation_extension_bind_to_declaration))
	ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(
	S, D, MultiTemplateParamsArg(), Bases);

	Decl *Dcl = HandleDeclarator(S, D, MultiTemplateParamsArg());

	if (OriginalLexicalContext && OriginalLexicalContext->isObjCContainer() &&
	Dcl && Dcl->getDeclContext()->isFileContext())
	Dcl->setTopLevelDeclInObjCContainer();

	if (!Bases.empty())
	ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, Bases);

	return Dcl;
	}

	/// DiagnoseClassNameShadow - Implement C++ [class.mem]p13:
	/// If T is the name of a class, then each of the following shall have a
	/// name different from T:
	/// - every static data member of class T;
	/// - every member function of class T
	/// - every member of class T that is itself a type;
	/// \returns true if the declaration name violates these rules.
	bool Sema::DiagnoseClassNameShadow(DeclContext *DC,
	DeclarationNameInfo NameInfo) {
	DeclarationName Name = NameInfo.getName();

	CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC);
	while (Record && Record->isAnonymousStructOrUnion())
	Record = dyn_cast<CXXRecordDecl>(Record->getParent());
	if (Record && Record->getIdentifier() && Record->getDeclName() == Name) {
	Diag(NameInfo.getLoc(), diag::err_member_name_of_class) << Name;
	return true;
	}

	return false;
	}

	/// Diagnose a declaration whose declarator-id has the given
	/// nested-name-specifier.
	///
	/// \param SS The nested-name-specifier of the declarator-id.
	///
	/// \param DC The declaration context to which the nested-name-specifier
	/// resolves.
	///
	/// \param Name The name of the entity being declared.
	///
	/// \param Loc The location of the name of the entity being declared.
	///
	/// \param IsTemplateId Whether the name is a (simple-)template-id, and thus
	/// we're declaring an explicit / partial specialization / instantiation.
	///
	/// \returns true if we cannot safely recover from this error, false otherwise.
	bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
	DeclarationName Name,
	SourceLocation Loc, bool IsTemplateId) {
	DeclContext *Cur = CurContext;
	while (isa<LinkageSpecDecl>(Cur) \|\| isa<CapturedDecl>(Cur))
	Cur = Cur->getParent();

	// If the user provided a superfluous scope specifier that refers back to the
	// class in which the entity is already declared, diagnose and ignore it.
	//
	// class X {
	// void X::f();
	// };
	//
	// Note, it was once ill-formed to give redundant qualification in all
	// contexts, but that rule was removed by DR482.
	if (Cur->Equals(DC)) {
	if (Cur->isRecord()) {
	Diag(Loc, LangOpts.MicrosoftExt ? diag::warn_member_extra_qualification
	: diag::err_member_extra_qualification)
	<< Name << FixItHint::CreateRemoval(SS.getRange());
	SS.clear();
	} else {
	Diag(Loc, diag::warn_namespace_member_extra_qualification) << Name;
	}
	return false;
	}

	// Check whether the qualifying scope encloses the scope of the original
	// declaration. For a template-id, we perform the checks in
	// CheckTemplateSpecializationScope.
	if (!Cur->Encloses(DC) && !IsTemplateId) {
	if (Cur->isRecord())
	Diag(Loc, diag::err_member_qualification)
	<< Name << SS.getRange();
	else if (isa<TranslationUnitDecl>(DC))
	Diag(Loc, diag::err_invalid_declarator_global_scope)
	<< Name << SS.getRange();
	else if (isa<FunctionDecl>(Cur))
	Diag(Loc, diag::err_invalid_declarator_in_function)
	<< Name << SS.getRange();
	else if (isa<BlockDecl>(Cur))
	Diag(Loc, diag::err_invalid_declarator_in_block)
	<< Name << SS.getRange();
	else if (isa<ExportDecl>(Cur)) {
	if (!isa<NamespaceDecl>(DC))
	Diag(Loc, diag::err_export_non_namespace_scope_name)
	<< Name << SS.getRange();
	else
	// The cases that DC is not NamespaceDecl should be handled in
	// CheckRedeclarationExported.
	return false;
	} else
	Diag(Loc, diag::err_invalid_declarator_scope)
	<< Name << cast<NamedDecl>(Cur) << cast<NamedDecl>(DC) << SS.getRange();

	return true;
	}

	if (Cur->isRecord()) {
	// Cannot qualify members within a class.
	Diag(Loc, diag::err_member_qualification)
	<< Name << SS.getRange();
	SS.clear();

	// C++ constructors and destructors with incorrect scopes can break
	// our AST invariants by having the wrong underlying types. If
	// that's the case, then drop this declaration entirely.
	if ((Name.getNameKind() == DeclarationName::CXXConstructorName \|\|
	Name.getNameKind() == DeclarationName::CXXDestructorName) &&
	!Context.hasSameType(Name.getCXXNameType(),
	Context.getTypeDeclType(cast<CXXRecordDecl>(Cur))))
	return true;

	return false;
	}

	// C++11 [dcl.meaning]p1:
	// [...] "The nested-name-specifier of the qualified declarator-id shall
	// not begin with a decltype-specifer"
	NestedNameSpecifierLoc SpecLoc(SS.getScopeRep(), SS.location_data());
	while (SpecLoc.getPrefix())
	SpecLoc = SpecLoc.getPrefix();
	if (isa_and_nonnull<DecltypeType>(
	SpecLoc.getNestedNameSpecifier()->getAsType()))
	Diag(Loc, diag::err_decltype_in_declarator)
	<< SpecLoc.getTypeLoc().getSourceRange();

	return false;
	}

	NamedDecl Sema::HandleDeclarator(Scope S, Declarator &D,
	MultiTemplateParamsArg TemplateParamLists) {
	// TODO: consider using NameInfo for diagnostic.
	DeclarationNameInfo NameInfo = GetNameForDeclarator(D);
	DeclarationName Name = NameInfo.getName();

	// All of these full declarators require an identifier. If it doesn't have
	// one, the ParsedFreeStandingDeclSpec action should be used.
	if (D.isDecompositionDeclarator()) {
	return ActOnDecompositionDeclarator(S, D, TemplateParamLists);
	} else if (!Name) {
	if (!D.isInvalidType()) // Reject this if we think it is valid.
	Diag(D.getDeclSpec().getBeginLoc(), diag::err_declarator_need_ident)
	<< D.getDeclSpec().getSourceRange() << D.getSourceRange();
	return nullptr;
	} else if (DiagnoseUnexpandedParameterPack(NameInfo, UPPC_DeclarationType))
	return nullptr;

	// The scope passed in may not be a decl scope. Zip up the scope tree until
	// we find one that is.
	while ((S->getFlags() & Scope::DeclScope) == 0 \|\|
	(S->getFlags() & Scope::TemplateParamScope) != 0)
	S = S->getParent();

	DeclContext *DC = CurContext;
	if (D.getCXXScopeSpec().isInvalid())
	D.setInvalidType();
	else if (D.getCXXScopeSpec().isSet()) {
	if (DiagnoseUnexpandedParameterPack(D.getCXXScopeSpec(),
	UPPC_DeclarationQualifier))
	return nullptr;

	bool EnteringContext = !D.getDeclSpec().isFriendSpecified();
	DC = computeDeclContext(D.getCXXScopeSpec(), EnteringContext);
	if (!DC \|\| isa<EnumDecl>(DC)) {
	// If we could not compute the declaration context, it's because the
	// declaration context is dependent but does not refer to a class,
	// class template, or class template partial specialization. Complain
	// and return early, to avoid the coming semantic disaster.
	Diag(D.getIdentifierLoc(),
	diag::err_template_qualified_declarator_no_match)
	<< D.getCXXScopeSpec().getScopeRep()
	<< D.getCXXScopeSpec().getRange();
	return nullptr;
	}
	bool IsDependentContext = DC->isDependentContext();

	if (!IsDependentContext &&
	RequireCompleteDeclContext(D.getCXXScopeSpec(), DC))
	return nullptr;

	// If a class is incomplete, do not parse entities inside it.
	if (isa<CXXRecordDecl>(DC) && !cast<CXXRecordDecl>(DC)->hasDefinition()) {
	Diag(D.getIdentifierLoc(),
	diag::err_member_def_undefined_record)
	<< Name << DC << D.getCXXScopeSpec().getRange();
	return nullptr;
	}
	if (!D.getDeclSpec().isFriendSpecified()) {
	if (diagnoseQualifiedDeclaration(
	D.getCXXScopeSpec(), DC, Name, D.getIdentifierLoc(),
	D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId)) {
	if (DC->isRecord())
	return nullptr;

	D.setInvalidType();
	}
	}

	// Check whether we need to rebuild the type of the given
	// declaration in the current instantiation.
	if (EnteringContext && IsDependentContext &&
	TemplateParamLists.size() != 0) {
	ContextRAII SavedContext(*this, DC);
	if (RebuildDeclaratorInCurrentInstantiation(*this, D, Name))
	D.setInvalidType();
	}
	}

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType R = TInfo->getType();

	if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
	UPPC_DeclarationType))
	D.setInvalidType();

	LookupResult Previous(*this, NameInfo, LookupOrdinaryName,
	forRedeclarationInCurContext());

	// See if this is a redefinition of a variable in the same scope.
	if (!D.getCXXScopeSpec().isSet()) {
	bool IsLinkageLookup = false;
	bool CreateBuiltins = false;

	// If the declaration we're planning to build will be a function
	// or object with linkage, then look for another declaration with
	// linkage (C99 6.2.2p4-5 and C++ [basic.link]p6).
	//
	// If the declaration we're planning to build will be declared with
	// external linkage in the translation unit, create any builtin with
	// the same name.
	if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef)
	/* Do nothing*/;
	else if (CurContext->isFunctionOrMethod() &&
	(D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_extern \|\|
	R->isFunctionType())) {
	IsLinkageLookup = true;
	CreateBuiltins =
	CurContext->getEnclosingNamespaceContext()->isTranslationUnit();
	} else if (CurContext->getRedeclContext()->isTranslationUnit() &&
	D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static)
	CreateBuiltins = true;

	if (IsLinkageLookup) {
	Previous.clear(LookupRedeclarationWithLinkage);
	Previous.setRedeclarationKind(ForExternalRedeclaration);
	}

	LookupName(Previous, S, CreateBuiltins);
	} else { // Something like "int foo::x;"
	LookupQualifiedName(Previous, DC);

	// C++ [dcl.meaning]p1:
	// When the declarator-id is qualified, the declaration shall refer to a
	// previously declared member of the class or namespace to which the
	// qualifier refers (or, in the case of a namespace, of an element of the
	// inline namespace set of that namespace (7.3.1)) or to a specialization
	// thereof; [...]
	//
	// Note that we already checked the context above, and that we do not have
	// enough information to make sure that Previous contains the declaration
	// we want to match. For example, given:
	//
	// class X {
	// void f();
	// void f(float);
	// };
	//
	// void X::f(int) { } // ill-formed
	//
	// In this case, Previous will point to the overload set
	// containing the two f's declared in X, but neither of them
	// matches.

	// C++ [dcl.meaning]p1:
	// [...] the member shall not merely have been introduced by a
	// using-declaration in the scope of the class or namespace nominated by
	// the nested-name-specifier of the declarator-id.
	RemoveUsingDecls(Previous);
	}

	if (Previous.isSingleResult() &&
	Previous.getFoundDecl()->isTemplateParameter()) {
	// Maybe we will complain about the shadowed template parameter.
	if (!D.isInvalidType())
	DiagnoseTemplateParameterShadow(D.getIdentifierLoc(),
	Previous.getFoundDecl());

	// Just pretend that we didn't see the previous declaration.
	Previous.clear();
	}

	if (!R->isFunctionType() && DiagnoseClassNameShadow(DC, NameInfo))
	// Forget that the previous declaration is the injected-class-name.
	Previous.clear();

	// In C++, the previous declaration we find might be a tag type
	// (class or enum). In this case, the new declaration will hide the
	// tag type. Note that this applies to functions, function templates, and
	// variables, but not to typedefs (C++ [dcl.typedef]p4) or variable templates.
	if (Previous.isSingleTagDecl() &&
	D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
	(TemplateParamLists.size() == 0 \|\| R->isFunctionType()))
	Previous.clear();

	// Check that there are no default arguments other than in the parameters
	// of a function declaration (C++ only).
	if (getLangOpts().CPlusPlus)
	CheckExtraCXXDefaultArguments(D);

	NamedDecl *New;

	bool AddToScope = true;
	if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef) {
	if (TemplateParamLists.size()) {
	Diag(D.getIdentifierLoc(), diag::err_template_typedef);
	return nullptr;
	}

	New = ActOnTypedefDeclarator(S, D, DC, TInfo, Previous);
	} else if (R->isFunctionType()) {
	New = ActOnFunctionDeclarator(S, D, DC, TInfo, Previous,
	TemplateParamLists,
	AddToScope);
	} else {
	New = ActOnVariableDeclarator(S, D, DC, TInfo, Previous, TemplateParamLists,
	AddToScope);
	}

	if (!New)
	return nullptr;

	// If this has an identifier and is not a function template specialization,
	// add it to the scope stack.
	if (New->getDeclName() && AddToScope)
	PushOnScopeChains(New, S);

	if (isInOpenMPDeclareTargetContext())
	checkDeclIsAllowedInOpenMPTarget(nullptr, New);

	return New;
	}

	/// Helper method to turn variable array types into constant array
	/// types in certain situations which would otherwise be errors (for
	/// GCC compatibility).
	static QualType TryToFixInvalidVariablyModifiedType(QualType T,
	ASTContext &Context,
	bool &SizeIsNegative,
	llvm::APSInt &Oversized) {
	// This method tries to turn a variable array into a constant
	// array even when the size isn't an ICE. This is necessary
	// for compatibility with code that depends on gcc's buggy
	// constant expression folding, like struct {char x[(int)(char*)2];}
	SizeIsNegative = false;
	Oversized = 0;

	if (T->isDependentType())
	return QualType();

	QualifierCollector Qs;
	const Type *Ty = Qs.strip(T);

	if (const PointerType* PTy = dyn_cast<PointerType>(Ty)) {
	QualType Pointee = PTy->getPointeeType();
	QualType FixedType =
	TryToFixInvalidVariablyModifiedType(Pointee, Context, SizeIsNegative,
	Oversized);
	if (FixedType.isNull()) return FixedType;
	FixedType = Context.getPointerType(FixedType);
	return Qs.apply(Context, FixedType);
	}
	if (const ParenType* PTy = dyn_cast<ParenType>(Ty)) {
	QualType Inner = PTy->getInnerType();
	QualType FixedType =
	TryToFixInvalidVariablyModifiedType(Inner, Context, SizeIsNegative,
	Oversized);
	if (FixedType.isNull()) return FixedType;
	FixedType = Context.getParenType(FixedType);
	return Qs.apply(Context, FixedType);
	}

	const VariableArrayType* VLATy = dyn_cast<VariableArrayType>(T);
	if (!VLATy)
	return QualType();

	QualType ElemTy = VLATy->getElementType();
	if (ElemTy->isVariablyModifiedType()) {
	ElemTy = TryToFixInvalidVariablyModifiedType(ElemTy, Context,
	SizeIsNegative, Oversized);
	if (ElemTy.isNull())
	return QualType();
	}

	Expr::EvalResult Result;
	if (!VLATy->getSizeExpr() \|\|
	!VLATy->getSizeExpr()->EvaluateAsInt(Result, Context))
	return QualType();

	llvm::APSInt Res = Result.Val.getInt();

	// Check whether the array size is negative.
	if (Res.isSigned() && Res.isNegative()) {
	SizeIsNegative = true;
	return QualType();
	}

	// Check whether the array is too large to be addressed.
	unsigned ActiveSizeBits =
	(!ElemTy->isDependentType() && !ElemTy->isVariablyModifiedType() &&
	!ElemTy->isIncompleteType() && !ElemTy->isUndeducedType())
	? ConstantArrayType::getNumAddressingBits(Context, ElemTy, Res)
	: Res.getActiveBits();
	if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context)) {
	Oversized = Res;
	return QualType();
	}

	QualType FoldedArrayType = Context.getConstantArrayType(
	ElemTy, Res, VLATy->getSizeExpr(), ArrayType::Normal, 0);
	return Qs.apply(Context, FoldedArrayType);
	}

	static void
	FixInvalidVariablyModifiedTypeLoc(TypeLoc SrcTL, TypeLoc DstTL) {
	SrcTL = SrcTL.getUnqualifiedLoc();
	DstTL = DstTL.getUnqualifiedLoc();
	if (PointerTypeLoc SrcPTL = SrcTL.getAs<PointerTypeLoc>()) {
	PointerTypeLoc DstPTL = DstTL.castAs<PointerTypeLoc>();
	FixInvalidVariablyModifiedTypeLoc(SrcPTL.getPointeeLoc(),
	DstPTL.getPointeeLoc());
	DstPTL.setStarLoc(SrcPTL.getStarLoc());
	return;
	}
	if (ParenTypeLoc SrcPTL = SrcTL.getAs<ParenTypeLoc>()) {
	ParenTypeLoc DstPTL = DstTL.castAs<ParenTypeLoc>();
	FixInvalidVariablyModifiedTypeLoc(SrcPTL.getInnerLoc(),
	DstPTL.getInnerLoc());
	DstPTL.setLParenLoc(SrcPTL.getLParenLoc());
	DstPTL.setRParenLoc(SrcPTL.getRParenLoc());
	return;
	}
	ArrayTypeLoc SrcATL = SrcTL.castAs<ArrayTypeLoc>();
	ArrayTypeLoc DstATL = DstTL.castAs<ArrayTypeLoc>();
	TypeLoc SrcElemTL = SrcATL.getElementLoc();
	TypeLoc DstElemTL = DstATL.getElementLoc();
	if (VariableArrayTypeLoc SrcElemATL =
	SrcElemTL.getAs<VariableArrayTypeLoc>()) {
	ConstantArrayTypeLoc DstElemATL = DstElemTL.castAs<ConstantArrayTypeLoc>();
	FixInvalidVariablyModifiedTypeLoc(SrcElemATL, DstElemATL);
	} else {
	DstElemTL.initializeFullCopy(SrcElemTL);
	}
	DstATL.setLBracketLoc(SrcATL.getLBracketLoc());
	DstATL.setSizeExpr(SrcATL.getSizeExpr());
	DstATL.setRBracketLoc(SrcATL.getRBracketLoc());
	}

	/// Helper method to turn variable array types into constant array
	/// types in certain situations which would otherwise be errors (for
	/// GCC compatibility).
	static TypeSourceInfo*
	TryToFixInvalidVariablyModifiedTypeSourceInfo(TypeSourceInfo *TInfo,
	ASTContext &Context,
	bool &SizeIsNegative,
	llvm::APSInt &Oversized) {
	QualType FixedTy
	= TryToFixInvalidVariablyModifiedType(TInfo->getType(), Context,
	SizeIsNegative, Oversized);
	if (FixedTy.isNull())
	return nullptr;
	TypeSourceInfo *FixedTInfo = Context.getTrivialTypeSourceInfo(FixedTy);
	FixInvalidVariablyModifiedTypeLoc(TInfo->getTypeLoc(),
	FixedTInfo->getTypeLoc());
	return FixedTInfo;
	}

	/// Attempt to fold a variable-sized type to a constant-sized type, returning
	/// true if we were successful.
	bool Sema::tryToFixVariablyModifiedVarType(TypeSourceInfo *&TInfo,
	QualType &T, SourceLocation Loc,
	unsigned FailedFoldDiagID) {
	bool SizeIsNegative;
	llvm::APSInt Oversized;
	TypeSourceInfo *FixedTInfo = TryToFixInvalidVariablyModifiedTypeSourceInfo(
	TInfo, Context, SizeIsNegative, Oversized);
	if (FixedTInfo) {
	Diag(Loc, diag::ext_vla_folded_to_constant);
	TInfo = FixedTInfo;
	T = FixedTInfo->getType();
	return true;
	}

	if (SizeIsNegative)
	Diag(Loc, diag::err_typecheck_negative_array_size);
	else if (Oversized.getBoolValue())
	Diag(Loc, diag::err_array_too_large) << toString(Oversized, 10);
	else if (FailedFoldDiagID)
	Diag(Loc, FailedFoldDiagID);
	return false;
	}

	/// Register the given locally-scoped extern "C" declaration so
	/// that it can be found later for redeclarations. We include any extern "C"
	/// declaration that is not visible in the translation unit here, not just
	/// function-scope declarations.
	void
	Sema::RegisterLocallyScopedExternCDecl(NamedDecl ND, Scope S) {
	if (!getLangOpts().CPlusPlus &&
	ND->getLexicalDeclContext()->getRedeclContext()->isTranslationUnit())
	// Don't need to track declarations in the TU in C.
	return;

	// Note that we have a locally-scoped external with this name.
	Context.getExternCContextDecl()->makeDeclVisibleInContext(ND);
	}

	NamedDecl *Sema::findLocallyScopedExternCDecl(DeclarationName Name) {
	// FIXME: We can have multiple results via __attribute__((overloadable)).
	auto Result = Context.getExternCContextDecl()->lookup(Name);
	return Result.empty() ? nullptr : *Result.begin();
	}

	/// Diagnose function specifiers on a declaration of an identifier that
	/// does not identify a function.
	void Sema::DiagnoseFunctionSpecifiers(const DeclSpec &DS) {
	// FIXME: We should probably indicate the identifier in question to avoid
	// confusion for constructs like "virtual int a(), b;"
	if (DS.isVirtualSpecified())
	Diag(DS.getVirtualSpecLoc(),
	diag::err_virtual_non_function);

	if (DS.hasExplicitSpecifier())
	Diag(DS.getExplicitSpecLoc(),
	diag::err_explicit_non_function);

	if (DS.isNoreturnSpecified())
	Diag(DS.getNoreturnSpecLoc(),
	diag::err_noreturn_non_function);
	}

	NamedDecl*
	Sema::ActOnTypedefDeclarator(Scope* S, Declarator& D, DeclContext* DC,
	TypeSourceInfo *TInfo, LookupResult &Previous) {
	// Typedef declarators cannot be qualified (C++ [dcl.meaning]p1).
	if (D.getCXXScopeSpec().isSet()) {
	Diag(D.getIdentifierLoc(), diag::err_qualified_typedef_declarator)
	<< D.getCXXScopeSpec().getRange();
	D.setInvalidType();
	// Pretend we didn't see the scope specifier.
	DC = CurContext;
	Previous.clear();
	}

	DiagnoseFunctionSpecifiers(D.getDeclSpec());

	if (D.getDeclSpec().isInlineSpecified())
	Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
	<< getLangOpts().CPlusPlus17;
	if (D.getDeclSpec().hasConstexprSpecifier())
	Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_invalid_constexpr)
	<< 1 << static_cast<int>(D.getDeclSpec().getConstexprSpecifier());

	if (D.getName().getKind() != UnqualifiedIdKind::IK_Identifier) {
	if (D.getName().getKind() == UnqualifiedIdKind::IK_DeductionGuideName)
	Diag(D.getName().StartLocation,
	diag::err_deduction_guide_invalid_specifier)
	<< "typedef";
	else
	Diag(D.getName().StartLocation, diag::err_typedef_not_identifier)
	<< D.getName().getSourceRange();
	return nullptr;
	}

	TypedefDecl *NewTD = ParseTypedefDecl(S, D, TInfo->getType(), TInfo);
	if (!NewTD) return nullptr;

	// Handle attributes prior to checking for duplicates in MergeVarDecl
	ProcessDeclAttributes(S, NewTD, D);

	CheckTypedefForVariablyModifiedType(S, NewTD);

	bool Redeclaration = D.isRedeclaration();
	NamedDecl *ND = ActOnTypedefNameDecl(S, DC, NewTD, Previous, Redeclaration);
	D.setRedeclaration(Redeclaration);
	return ND;
	}

	void
	Sema::CheckTypedefForVariablyModifiedType(Scope S, TypedefNameDecl NewTD) {
	// C99 6.7.7p2: If a typedef name specifies a variably modified type
	// then it shall have block scope.
	// Note that variably modified types must be fixed before merging the decl so
	// that redeclarations will match.
	TypeSourceInfo *TInfo = NewTD->getTypeSourceInfo();
	QualType T = TInfo->getType();
	if (T->isVariablyModifiedType()) {
	setFunctionHasBranchProtectedScope();

	if (S->getFnParent() == nullptr) {
	bool SizeIsNegative;
	llvm::APSInt Oversized;
	TypeSourceInfo *FixedTInfo =
	TryToFixInvalidVariablyModifiedTypeSourceInfo(TInfo, Context,
	SizeIsNegative,
	Oversized);
	if (FixedTInfo) {
	Diag(NewTD->getLocation(), diag::ext_vla_folded_to_constant);
	NewTD->setTypeSourceInfo(FixedTInfo);
	} else {
	if (SizeIsNegative)
	Diag(NewTD->getLocation(), diag::err_typecheck_negative_array_size);
	else if (T->isVariableArrayType())
	Diag(NewTD->getLocation(), diag::err_vla_decl_in_file_scope);
	else if (Oversized.getBoolValue())
	Diag(NewTD->getLocation(), diag::err_array_too_large)
	<< toString(Oversized, 10);
	else
	Diag(NewTD->getLocation(), diag::err_vm_decl_in_file_scope);
	NewTD->setInvalidDecl();
	}
	}
	}
	}

	/// ActOnTypedefNameDecl - Perform semantic checking for a declaration which
	/// declares a typedef-name, either using the 'typedef' type specifier or via
	/// a C++0x [dcl.typedef]p2 alias-declaration: 'using T = A;'.
	NamedDecl*
	Sema::ActOnTypedefNameDecl(Scope S, DeclContext DC, TypedefNameDecl *NewTD,
	LookupResult &Previous, bool &Redeclaration) {

	// Find the shadowed declaration before filtering for scope.
	NamedDecl *ShadowedDecl = getShadowedDeclaration(NewTD, Previous);

	// Merge the decl with the existing one if appropriate. If the decl is
	// in an outer scope, it isn't the same thing.
	FilterLookupForScope(Previous, DC, S, /ConsiderLinkage/false,
	/AllowInlineNamespace/false);
	filterNonConflictingPreviousTypedefDecls(*this, NewTD, Previous);
	if (!Previous.empty()) {
	Redeclaration = true;
	MergeTypedefNameDecl(S, NewTD, Previous);
	} else {
	inferGslPointerAttribute(NewTD);
	}

	if (ShadowedDecl && !Redeclaration)
	CheckShadow(NewTD, ShadowedDecl, Previous);

	// If this is the C FILE type, notify the AST context.
	if (IdentifierInfo *II = NewTD->getIdentifier())
	if (!NewTD->isInvalidDecl() &&
	NewTD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
	if (II->isStr("FILE"))
	Context.setFILEDecl(NewTD);
	else if (II->isStr("jmp_buf"))
	Context.setjmp_bufDecl(NewTD);
	else if (II->isStr("sigjmp_buf"))
	Context.setsigjmp_bufDecl(NewTD);
	else if (II->isStr("ucontext_t"))
	Context.setucontext_tDecl(NewTD);
	}

	return NewTD;
	}

	/// Determines whether the given declaration is an out-of-scope
	/// previous declaration.
	///
	/// This routine should be invoked when name lookup has found a
	/// previous declaration (PrevDecl) that is not in the scope where a
	/// new declaration by the same name is being introduced. If the new
	/// declaration occurs in a local scope, previous declarations with
	/// linkage may still be considered previous declarations (C99
	/// 6.2.2p4-5, C++ [basic.link]p6).
	///
	/// \param PrevDecl the previous declaration found by name
	/// lookup
	///
	/// \param DC the context in which the new declaration is being
	/// declared.
	///
	/// \returns true if PrevDecl is an out-of-scope previous declaration
	/// for a new delcaration with the same name.
	static bool
	isOutOfScopePreviousDeclaration(NamedDecl PrevDecl, DeclContext DC,
	ASTContext &Context) {
	if (!PrevDecl)
	return false;

	if (!PrevDecl->hasLinkage())
	return false;

	if (Context.getLangOpts().CPlusPlus) {
	// C++ [basic.link]p6:
	// If there is a visible declaration of an entity with linkage
	// having the same name and type, ignoring entities declared
	// outside the innermost enclosing namespace scope, the block
	// scope declaration declares that same entity and receives the
	// linkage of the previous declaration.
	DeclContext *OuterContext = DC->getRedeclContext();
	if (!OuterContext->isFunctionOrMethod())
	// This rule only applies to block-scope declarations.
	return false;

	DeclContext *PrevOuterContext = PrevDecl->getDeclContext();
	if (PrevOuterContext->isRecord())
	// We found a member function: ignore it.
	return false;

	// Find the innermost enclosing namespace for the new and
	// previous declarations.
	OuterContext = OuterContext->getEnclosingNamespaceContext();
	PrevOuterContext = PrevOuterContext->getEnclosingNamespaceContext();

	// The previous declaration is in a different namespace, so it
	// isn't the same function.
	if (!OuterContext->Equals(PrevOuterContext))
	return false;
	}

	return true;
	}

	static void SetNestedNameSpecifier(Sema &S, DeclaratorDecl *DD, Declarator &D) {
	CXXScopeSpec &SS = D.getCXXScopeSpec();
	if (!SS.isSet()) return;
	DD->setQualifierInfo(SS.getWithLocInContext(S.Context));
	}

	bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
	QualType type = decl->getType();
	Qualifiers::ObjCLifetime lifetime = type.getObjCLifetime();
	if (lifetime == Qualifiers::OCL_Autoreleasing) {
	// Various kinds of declaration aren't allowed to be __autoreleasing.
	unsigned kind = -1U;
	if (VarDecl *var = dyn_cast<VarDecl>(decl)) {
	if (var->hasAttr<BlocksAttr>())
	kind = 0; // __block
	else if (!var->hasLocalStorage())
	kind = 1; // global
	} else if (isa<ObjCIvarDecl>(decl)) {
	kind = 3; // ivar
	} else if (isa<FieldDecl>(decl)) {
	kind = 2; // field
	}

	if (kind != -1U) {
	Diag(decl->getLocation(), diag::err_arc_autoreleasing_var)
	<< kind;
	}
	} else if (lifetime == Qualifiers::OCL_None) {
	// Try to infer lifetime.
	if (!type->isObjCLifetimeType())
	return false;

	lifetime = type->getObjCARCImplicitLifetime();
	type = Context.getLifetimeQualifiedType(type, lifetime);
	decl->setType(type);
	}

	if (VarDecl *var = dyn_cast<VarDecl>(decl)) {
	// Thread-local variables cannot have lifetime.
	if (lifetime && lifetime != Qualifiers::OCL_ExplicitNone &&
	var->getTLSKind()) {
	Diag(var->getLocation(), diag::err_arc_thread_ownership)
	<< var->getType();
	return true;
	}
	}

	return false;
	}

	void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) {
	if (Decl->getType().hasAddressSpace())
	return;
	if (Decl->getType()->isDependentType())
	return;
	if (VarDecl *Var = dyn_cast<VarDecl>(Decl)) {
	QualType Type = Var->getType();
	if (Type->isSamplerT() \|\| Type->isVoidType())
	return;
	LangAS ImplAS = LangAS::opencl_private;
	// OpenCL C v3.0 s6.7.8 - For OpenCL C 2.0 or with the
	// __opencl_c_program_scope_global_variables feature, the address space
	// for a variable at program scope or a static or extern variable inside
	// a function are inferred to be __global.
	if (getOpenCLOptions().areProgramScopeVariablesSupported(getLangOpts()) &&
	Var->hasGlobalStorage())
	ImplAS = LangAS::opencl_global;
	// If the original type from a decayed type is an array type and that array
	// type has no address space yet, deduce it now.
	if (auto DT = dyn_cast<DecayedType>(Type)) {
	auto OrigTy = DT->getOriginalType();
	if (!OrigTy.hasAddressSpace() && OrigTy->isArrayType()) {
	// Add the address space to the original array type and then propagate
	// that to the element type through `getAsArrayType`.
	OrigTy = Context.getAddrSpaceQualType(OrigTy, ImplAS);
	OrigTy = QualType(Context.getAsArrayType(OrigTy), 0);
	// Re-generate the decayed type.
	Type = Context.getDecayedType(OrigTy);
	}
	}
	Type = Context.getAddrSpaceQualType(Type, ImplAS);
	// Apply any qualifiers (including address space) from the array type to
	// the element type. This implements C99 6.7.3p8: "If the specification of
	// an array type includes any type qualifiers, the element type is so
	// qualified, not the array type."
	if (Type->isArrayType())
	Type = QualType(Context.getAsArrayType(Type), 0);
	Decl->setType(Type);
	}
	}

	static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
	// Ensure that an auto decl is deduced otherwise the checks below might cache
	// the wrong linkage.
	assert(S.ParsingInitForAutoVars.count(&ND) == 0);

	// 'weak' only applies to declarations with external linkage.
	if (WeakAttr *Attr = ND.getAttr<WeakAttr>()) {
	if (!ND.isExternallyVisible()) {
	S.Diag(Attr->getLocation(), diag::err_attribute_weak_static);
	ND.dropAttr<WeakAttr>();
	}
	}
	if (WeakRefAttr *Attr = ND.getAttr<WeakRefAttr>()) {
	if (ND.isExternallyVisible()) {
	S.Diag(Attr->getLocation(), diag::err_attribute_weakref_not_static);
	ND.dropAttr<WeakRefAttr>();
	ND.dropAttr<AliasAttr>();
	}
	}

	if (auto *VD = dyn_cast<VarDecl>(&ND)) {
	if (VD->hasInit()) {
	if (const auto *Attr = VD->getAttr<AliasAttr>()) {
	assert(VD->isThisDeclarationADefinition() &&
	!VD->isExternallyVisible() && "Broken AliasAttr handled late!");
	S.Diag(Attr->getLocation(), diag::err_alias_is_definition) << VD << 0;
	VD->dropAttr<AliasAttr>();
	}
	}
	}

	// 'selectany' only applies to externally visible variable declarations.
	// It does not apply to functions.
	if (SelectAnyAttr *Attr = ND.getAttr<SelectAnyAttr>()) {
	if (isa<FunctionDecl>(ND) \|\| !ND.isExternallyVisible()) {
	S.Diag(Attr->getLocation(),
	diag::err_attribute_selectany_non_extern_data);
	ND.dropAttr<SelectAnyAttr>();
	}
	}

	if (const InheritableAttr *Attr = getDLLAttr(&ND)) {
	auto *VD = dyn_cast<VarDecl>(&ND);
	bool IsAnonymousNS = false;
	bool IsMicrosoft = S.Context.getTargetInfo().getCXXABI().isMicrosoft();
	if (VD) {
	const NamespaceDecl *NS = dyn_cast<NamespaceDecl>(VD->getDeclContext());
	while (NS && !IsAnonymousNS) {
	IsAnonymousNS = NS->isAnonymousNamespace();
	NS = dyn_cast<NamespaceDecl>(NS->getParent());
	}
	}
	// dll attributes require external linkage. Static locals may have external
	// linkage but still cannot be explicitly imported or exported.
	// In Microsoft mode, a variable defined in anonymous namespace must have
	// external linkage in order to be exported.
	bool AnonNSInMicrosoftMode = IsAnonymousNS && IsMicrosoft;
	if ((ND.isExternallyVisible() && AnonNSInMicrosoftMode) \|\|
	(!AnonNSInMicrosoftMode &&
	(!ND.isExternallyVisible() \|\| (VD && VD->isStaticLocal())))) {
	S.Diag(ND.getLocation(), diag::err_attribute_dll_not_extern)
	<< &ND << Attr;
	ND.setInvalidDecl();
	}
	}

	// Check the attributes on the function type, if any.
	if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
	// Don't declare this variable in the second operand of the for-statement;
	// GCC miscompiles that by ending its lifetime before evaluating the
	// third operand. See gcc.gnu.org/PR86769.
	AttributedTypeLoc ATL;
	for (TypeLoc TL = FD->getTypeSourceInfo()->getTypeLoc();
	(ATL = TL.getAsAdjusted<AttributedTypeLoc>());
	TL = ATL.getModifiedLoc()) {
	// The [[lifetimebound]] attribute can be applied to the implicit object
	// parameter of a non-static member function (other than a ctor or dtor)
	// by applying it to the function type.
	if (const auto *A = ATL.getAttrAs<LifetimeBoundAttr>()) {
	const auto *MD = dyn_cast<CXXMethodDecl>(FD);
	if (!MD \|\| MD->isStatic()) {
	S.Diag(A->getLocation(), diag::err_lifetimebound_no_object_param)
	<< !MD << A->getRange();
	} else if (isa<CXXConstructorDecl>(MD) \|\| isa<CXXDestructorDecl>(MD)) {
	S.Diag(A->getLocation(), diag::err_lifetimebound_ctor_dtor)
	<< isa<CXXDestructorDecl>(MD) << A->getRange();
	}
	}
	}
	}
	}

	static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl,
	NamedDecl *NewDecl,
	bool IsSpecialization,
	bool IsDefinition) {
	if (OldDecl->isInvalidDecl() \|\| NewDecl->isInvalidDecl())
	return;

	bool IsTemplate = false;
	if (TemplateDecl *OldTD = dyn_cast<TemplateDecl>(OldDecl)) {
	OldDecl = OldTD->getTemplatedDecl();
	IsTemplate = true;
	if (!IsSpecialization)
	IsDefinition = false;
	}
	if (TemplateDecl *NewTD = dyn_cast<TemplateDecl>(NewDecl)) {
	NewDecl = NewTD->getTemplatedDecl();
	IsTemplate = true;
	}

	if (!OldDecl \|\| !NewDecl)
	return;

	const DLLImportAttr *OldImportAttr = OldDecl->getAttr<DLLImportAttr>();
	const DLLExportAttr *OldExportAttr = OldDecl->getAttr<DLLExportAttr>();
	const DLLImportAttr *NewImportAttr = NewDecl->getAttr<DLLImportAttr>();
	const DLLExportAttr *NewExportAttr = NewDecl->getAttr<DLLExportAttr>();

	// dllimport and dllexport are inheritable attributes so we have to exclude
	// inherited attribute instances.
	bool HasNewAttr = (NewImportAttr && !NewImportAttr->isInherited()) \|\|
	(NewExportAttr && !NewExportAttr->isInherited());

	// A redeclaration is not allowed to add a dllimport or dllexport attribute,
	// the only exception being explicit specializations.
	// Implicitly generated declarations are also excluded for now because there
	// is no other way to switch these to use dllimport or dllexport.
	bool AddsAttr = !(OldImportAttr \|\| OldExportAttr) && HasNewAttr;

	if (AddsAttr && !IsSpecialization && !OldDecl->isImplicit()) {
	// Allow with a warning for free functions and global variables.
	bool JustWarn = false;
	if (!OldDecl->isCXXClassMember()) {
	auto *VD = dyn_cast<VarDecl>(OldDecl);
	if (VD && !VD->getDescribedVarTemplate())
	JustWarn = true;
	auto *FD = dyn_cast<FunctionDecl>(OldDecl);
	if (FD && FD->getTemplatedKind() == FunctionDecl::TK_NonTemplate)
	JustWarn = true;
	}

	// We cannot change a declaration that's been used because IR has already
	// been emitted. Dllimported functions will still work though (modulo
	// address equality) as they can use the thunk.
	if (OldDecl->isUsed())
	if (!isa<FunctionDecl>(OldDecl) \|\| !NewImportAttr)
	JustWarn = false;

	unsigned DiagID = JustWarn ? diag::warn_attribute_dll_redeclaration
	: diag::err_attribute_dll_redeclaration;
	S.Diag(NewDecl->getLocation(), DiagID)
	<< NewDecl
	<< (NewImportAttr ? (const Attr *)NewImportAttr : NewExportAttr);
	S.Diag(OldDecl->getLocation(), diag::note_previous_declaration);
	if (!JustWarn) {
	NewDecl->setInvalidDecl();
	return;
	}
	}

	// A redeclaration is not allowed to drop a dllimport attribute, the only
	// exceptions being inline function definitions (except for function
	// templates), local extern declarations, qualified friend declarations or
	// special MSVC extension: in the last case, the declaration is treated as if
	// it were marked dllexport.
	bool IsInline = false, IsStaticDataMember = false, IsQualifiedFriend = false;
	bool IsMicrosoftABI = S.Context.getTargetInfo().shouldDLLImportComdatSymbols();
	if (const auto *VD = dyn_cast<VarDecl>(NewDecl)) {
	// Ignore static data because out-of-line definitions are diagnosed
	// separately.
	IsStaticDataMember = VD->isStaticDataMember();
	IsDefinition = VD->isThisDeclarationADefinition(S.Context) !=
	VarDecl::DeclarationOnly;
	} else if (const auto *FD = dyn_cast<FunctionDecl>(NewDecl)) {
	IsInline = FD->isInlined();
	IsQualifiedFriend = FD->getQualifier() &&
	FD->getFriendObjectKind() == Decl::FOK_Declared;
	}

	if (OldImportAttr && !HasNewAttr &&
	(!IsInline \|\| (IsMicrosoftABI && IsTemplate)) && !IsStaticDataMember &&
	!NewDecl->isLocalExternDecl() && !IsQualifiedFriend) {
	if (IsMicrosoftABI && IsDefinition) {
	if (IsSpecialization) {
	S.Diag(
	NewDecl->getLocation(),
	diag::err_attribute_dllimport_function_specialization_definition);
	S.Diag(OldImportAttr->getLocation(), diag::note_attribute);
	NewDecl->dropAttr<DLLImportAttr>();
	} else {
	S.Diag(NewDecl->getLocation(),
	diag::warn_redeclaration_without_import_attribute)
	<< NewDecl;
	S.Diag(OldDecl->getLocation(), diag::note_previous_declaration);
	NewDecl->dropAttr<DLLImportAttr>();
	NewDecl->addAttr(DLLExportAttr::CreateImplicit(
	S.Context, NewImportAttr->getRange()));
	}
	} else if (IsMicrosoftABI && IsSpecialization) {
	assert(!IsDefinition);
	// MSVC allows this. Keep the inherited attribute.
	} else {
	S.Diag(NewDecl->getLocation(),
	diag::warn_redeclaration_without_attribute_prev_attribute_ignored)
	<< NewDecl << OldImportAttr;
	S.Diag(OldDecl->getLocation(), diag::note_previous_declaration);
	S.Diag(OldImportAttr->getLocation(), diag::note_previous_attribute);
	OldDecl->dropAttr<DLLImportAttr>();
	NewDecl->dropAttr<DLLImportAttr>();
	}
	} else if (IsInline && OldImportAttr && !IsMicrosoftABI) {
	// In MinGW, seeing a function declared inline drops the dllimport
	// attribute.
	OldDecl->dropAttr<DLLImportAttr>();
	NewDecl->dropAttr<DLLImportAttr>();
	S.Diag(NewDecl->getLocation(),
	diag::warn_dllimport_dropped_from_inline_function)
	<< NewDecl << OldImportAttr;
	}

	// A specialization of a class template member function is processed here
	// since it's a redeclaration. If the parent class is dllexport, the
	// specialization inherits that attribute. This doesn't happen automatically
	// since the parent class isn't instantiated until later.
	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewDecl)) {
	if (MD->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization &&
	!NewImportAttr && !NewExportAttr) {
	if (const DLLExportAttr *ParentExportAttr =
	MD->getParent()->getAttr<DLLExportAttr>()) {
	DLLExportAttr *NewAttr = ParentExportAttr->clone(S.Context);
	NewAttr->setInherited(true);
	NewDecl->addAttr(NewAttr);
	}
	}
	}
	}

	/// Given that we are within the definition of the given function,
	/// will that definition behave like C99's 'inline', where the
	/// definition is discarded except for optimization purposes?
	static bool isFunctionDefinitionDiscarded(Sema &S, FunctionDecl *FD) {
	// Try to avoid calling GetGVALinkageForFunction.

	// All cases of this require the 'inline' keyword.
	if (!FD->isInlined()) return false;

	// This is only possible in C++ with the gnu_inline attribute.
	if (S.getLangOpts().CPlusPlus && !FD->hasAttr<GNUInlineAttr>())
	return false;

	// Okay, go ahead and call the relatively-more-expensive function.
	return S.Context.GetGVALinkageForFunction(FD) == GVA_AvailableExternally;
	}

	/// Determine whether a variable is extern "C" prior to attaching
	/// an initializer. We can't just call isExternC() here, because that
	/// will also compute and cache whether the declaration is externally
	/// visible, which might change when we attach the initializer.
	///
	/// This can only be used if the declaration is known to not be a
	/// redeclaration of an internal linkage declaration.
	///
	/// For instance:
	///
	/// auto x = []{};
	///
	/// Attaching the initializer here makes this declaration not externally
	/// visible, because its type has internal linkage.
	///
	/// FIXME: This is a hack.
	template<typename T>
	static bool isIncompleteDeclExternC(Sema &S, const T *D) {
	if (S.getLangOpts().CPlusPlus) {
	// In C++, the overloadable attribute negates the effects of extern "C".
	if (!D->isInExternCContext() \|\| D->template hasAttr<OverloadableAttr>())
	return false;

	// So do CUDA's host/device attributes.
	if (S.getLangOpts().CUDA && (D->template hasAttr<CUDADeviceAttr>() \|\|
	D->template hasAttr<CUDAHostAttr>()))
	return false;
	}
	return D->isExternC();
	}

	static bool shouldConsiderLinkage(const VarDecl *VD) {
	const DeclContext *DC = VD->getDeclContext()->getRedeclContext();
	if (DC->isFunctionOrMethod() \|\| isa<OMPDeclareReductionDecl>(DC) \|\|
	isa<OMPDeclareMapperDecl>(DC))
	return VD->hasExternalStorage();
	if (DC->isFileContext())
	return true;
	if (DC->isRecord())
	return false;
	if (DC->getDeclKind() == Decl::HLSLBuffer)
	return false;

	if (isa<RequiresExprBodyDecl>(DC))
	return false;
	llvm_unreachable("Unexpected context");
	}

	static bool shouldConsiderLinkage(const FunctionDecl *FD) {
	const DeclContext *DC = FD->getDeclContext()->getRedeclContext();
	if (DC->isFileContext() \|\| DC->isFunctionOrMethod() \|\|
	isa<OMPDeclareReductionDecl>(DC) \|\| isa<OMPDeclareMapperDecl>(DC))
	return true;
	if (DC->isRecord())
	return false;
	llvm_unreachable("Unexpected context");
	}

	static bool hasParsedAttr(Scope *S, const Declarator &PD,
	ParsedAttr::Kind Kind) {
	// Check decl attributes on the DeclSpec.
	if (PD.getDeclSpec().getAttributes().hasAttribute(Kind))
	return true;

	// Walk the declarator structure, checking decl attributes that were in a type
	// position to the decl itself.
	for (unsigned I = 0, E = PD.getNumTypeObjects(); I != E; ++I) {
	if (PD.getTypeObject(I).getAttrs().hasAttribute(Kind))
	return true;
	}

	// Finally, check attributes on the decl itself.
	return PD.getAttributes().hasAttribute(Kind) \|\|
	PD.getDeclarationAttributes().hasAttribute(Kind);
	}

	/// Adjust the \c DeclContext for a function or variable that might be a
	/// function-local external declaration.
	bool Sema::adjustContextForLocalExternDecl(DeclContext *&DC) {
	if (!DC->isFunctionOrMethod())
	return false;

	// If this is a local extern function or variable declared within a function
	// template, don't add it into the enclosing namespace scope until it is
	// instantiated; it might have a dependent type right now.
	if (DC->isDependentContext())
	return true;

	// C++11 [basic.link]p7:
	// When a block scope declaration of an entity with linkage is not found to
	// refer to some other declaration, then that entity is a member of the
	// innermost enclosing namespace.
	//
	// Per C++11 [namespace.def]p6, the innermost enclosing namespace is a
	// semantically-enclosing namespace, not a lexically-enclosing one.
	while (!DC->isFileContext() && !isa<LinkageSpecDecl>(DC))
	DC = DC->getParent();
	return true;
	}

	/// Returns true if given declaration has external C language linkage.
	static bool isDeclExternC(const Decl *D) {
	if (const auto *FD = dyn_cast<FunctionDecl>(D))
	return FD->isExternC();
	if (const auto *VD = dyn_cast<VarDecl>(D))
	return VD->isExternC();

	llvm_unreachable("Unknown type of decl!");
	}

	/// Returns true if there hasn't been any invalid type diagnosed.
	static bool diagnoseOpenCLTypes(Sema &Se, VarDecl *NewVD) {
	DeclContext *DC = NewVD->getDeclContext();
	QualType R = NewVD->getType();

	// OpenCL v2.0 s6.9.b - Image type can only be used as a function argument.
	// OpenCL v2.0 s6.13.16.1 - Pipe type can only be used as a function
	// argument.
	if (R->isImageType() \|\| R->isPipeType()) {
	Se.Diag(NewVD->getLocation(),
	diag::err_opencl_type_can_only_be_used_as_function_parameter)
	<< R;
	NewVD->setInvalidDecl();
	return false;
	}

	// OpenCL v1.2 s6.9.r:
	// The event type cannot be used to declare a program scope variable.
	// OpenCL v2.0 s6.9.q:
	// The clk_event_t and reserve_id_t types cannot be declared in program
	// scope.
	if (NewVD->hasGlobalStorage() && !NewVD->isStaticLocal()) {
	if (R->isReserveIDT() \|\| R->isClkEventT() \|\| R->isEventT()) {
	Se.Diag(NewVD->getLocation(),
	diag::err_invalid_type_for_program_scope_var)
	<< R;
	NewVD->setInvalidDecl();
	return false;
	}
	}

	// OpenCL v1.0 s6.8.a.3: Pointers to functions are not allowed.
	if (!Se.getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
	Se.getLangOpts())) {
	QualType NR = R.getCanonicalType();
	while (NR->isPointerType() \|\| NR->isMemberFunctionPointerType() \|\|
	NR->isReferenceType()) {
	if (NR->isFunctionPointerType() \|\| NR->isMemberFunctionPointerType() \|\|
	NR->isFunctionReferenceType()) {
	Se.Diag(NewVD->getLocation(), diag::err_opencl_function_pointer)
	<< NR->isReferenceType();
	NewVD->setInvalidDecl();
	return false;
	}
	NR = NR->getPointeeType();
	}
	}

	if (!Se.getOpenCLOptions().isAvailableOption("cl_khr_fp16",
	Se.getLangOpts())) {
	// OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and
	// half array type (unless the cl_khr_fp16 extension is enabled).
	if (Se.Context.getBaseElementType(R)->isHalfType()) {
	Se.Diag(NewVD->getLocation(), diag::err_opencl_half_declaration) << R;
	NewVD->setInvalidDecl();
	return false;
	}
	}

	// OpenCL v1.2 s6.9.r:
	// The event type cannot be used with the __local, __constant and __global
	// address space qualifiers.
	if (R->isEventT()) {
	if (R.getAddressSpace() != LangAS::opencl_private) {
	Se.Diag(NewVD->getBeginLoc(), diag::err_event_t_addr_space_qual);
	NewVD->setInvalidDecl();
	return false;
	}
	}

	if (R->isSamplerT()) {
	// OpenCL v1.2 s6.9.b p4:
	// The sampler type cannot be used with the __local and __global address
	// space qualifiers.
	if (R.getAddressSpace() == LangAS::opencl_local \|\|
	R.getAddressSpace() == LangAS::opencl_global) {
	Se.Diag(NewVD->getLocation(), diag::err_wrong_sampler_addressspace);
	NewVD->setInvalidDecl();
	}

	// OpenCL v1.2 s6.12.14.1:
	// A global sampler must be declared with either the constant address
	// space qualifier or with the const qualifier.
	if (DC->isTranslationUnit() &&
	!(R.getAddressSpace() == LangAS::opencl_constant \|\|
	R.isConstQualified())) {
	Se.Diag(NewVD->getLocation(), diag::err_opencl_nonconst_global_sampler);
	NewVD->setInvalidDecl();
	}
	if (NewVD->isInvalidDecl())
	return false;
	}

	return true;
	}

	template <typename AttrTy>
	static void copyAttrFromTypedefToDecl(Sema &S, Decl D, const TypedefType TT) {
	const TypedefNameDecl *TND = TT->getDecl();
	if (const auto *Attribute = TND->getAttr<AttrTy>()) {
	AttrTy *Clone = Attribute->clone(S.Context);
	Clone->setInherited(true);
	D->addAttr(Clone);
	}
	}

	// This function emits warning and a corresponding note based on the
	// ReadOnlyPlacementAttr attribute. The warning checks that all global variable
	// declarations of an annotated type must be const qualified.
	void emitReadOnlyPlacementAttrWarning(Sema &S, const VarDecl *VD) {
	QualType VarType = VD->getType().getCanonicalType();

	// Ignore local declarations (for now) and those with const qualification.
	// TODO: Local variables should not be allowed if their type declaration has
	// ReadOnlyPlacementAttr attribute. To be handled in follow-up patch.
	if (!VD \|\| VD->hasLocalStorage() \|\| VD->getType().isConstQualified())
	return;

	if (VarType->isArrayType()) {
	// Retrieve element type for array declarations.
	VarType = S.getASTContext().getBaseElementType(VarType);
	}

	const RecordDecl *RD = VarType->getAsRecordDecl();

	// Check if the record declaration is present and if it has any attributes.
	if (RD == nullptr)
	return;

	if (const auto *ConstDecl = RD->getAttr<ReadOnlyPlacementAttr>()) {
	S.Diag(VD->getLocation(), diag::warn_var_decl_not_read_only) << RD;
	S.Diag(ConstDecl->getLocation(), diag::note_enforce_read_only_placement);
	return;
	}
	}

	NamedDecl *Sema::ActOnVariableDeclarator(
	Scope S, Declarator &D, DeclContext DC, TypeSourceInfo *TInfo,
	LookupResult &Previous, MultiTemplateParamsArg TemplateParamLists,
	bool &AddToScope, ArrayRef<BindingDecl *> Bindings) {
	QualType R = TInfo->getType();
	DeclarationName Name = GetNameForDeclarator(D).getName();

	IdentifierInfo *II = Name.getAsIdentifierInfo();

	if (D.isDecompositionDeclarator()) {
	// Take the name of the first declarator as our name for diagnostic
	// purposes.
	auto &Decomp = D.getDecompositionDeclarator();
	if (!Decomp.bindings().empty()) {
	II = Decomp.bindings()[0].Name;
	Name = II;
	}
	} else if (!II) {
	Diag(D.getIdentifierLoc(), diag::err_bad_variable_name) << Name;
	return nullptr;
	}


	DeclSpec::SCS SCSpec = D.getDeclSpec().getStorageClassSpec();
	StorageClass SC = StorageClassSpecToVarDeclStorageClass(D.getDeclSpec());

	// dllimport globals without explicit storage class are treated as extern. We
	// have to change the storage class this early to get the right DeclContext.
	if (SC == SC_None && !DC->isRecord() &&
	hasParsedAttr(S, D, ParsedAttr::AT_DLLImport) &&
	!hasParsedAttr(S, D, ParsedAttr::AT_DLLExport))
	SC = SC_Extern;

	DeclContext *OriginalDC = DC;
	bool IsLocalExternDecl = SC == SC_Extern &&
	adjustContextForLocalExternDecl(DC);

	if (SCSpec == DeclSpec::SCS_mutable) {
	// mutable can only appear on non-static class members, so it's always
	// an error here
	Diag(D.getIdentifierLoc(), diag::err_mutable_nonmember);
	D.setInvalidType();
	SC = SC_None;
	}

	if (getLangOpts().CPlusPlus11 && SCSpec == DeclSpec::SCS_register &&
	!D.getAsmLabel() && !getSourceManager().isInSystemMacro(
	D.getDeclSpec().getStorageClassSpecLoc())) {
	// In C++11, the 'register' storage class specifier is deprecated.
	// Suppress the warning in system macros, it's used in macros in some
	// popular C system headers, such as in glibc's htonl() macro.
	Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	getLangOpts().CPlusPlus17 ? diag::ext_register_storage_class
	: diag::warn_deprecated_register)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
	}

	DiagnoseFunctionSpecifiers(D.getDeclSpec());

	if (!DC->isRecord() && S->getFnParent() == nullptr) {
	// C99 6.9p2: The storage-class specifiers auto and register shall not
	// appear in the declaration specifiers in an external declaration.
	// Global Register+Asm is a GNU extension we support.
	if (SC == SC_Auto \|\| (SC == SC_Register && !D.getAsmLabel())) {
	Diag(D.getIdentifierLoc(), diag::err_typecheck_sclass_fscope);
	D.setInvalidType();
	}
	}

	// If this variable has a VLA type and an initializer, try to
	// fold to a constant-sized type. This is otherwise invalid.
	if (D.hasInitializer() && R->isVariableArrayType())
	tryToFixVariablyModifiedVarType(TInfo, R, D.getIdentifierLoc(),
	/DiagID=/0);

	bool IsMemberSpecialization = false;
	bool IsVariableTemplateSpecialization = false;
	bool IsPartialSpecialization = false;
	bool IsVariableTemplate = false;
	VarDecl *NewVD = nullptr;
	VarTemplateDecl *NewTemplate = nullptr;
	TemplateParameterList *TemplateParams = nullptr;
	if (!getLangOpts().CPlusPlus) {
	NewVD = VarDecl::Create(Context, DC, D.getBeginLoc(), D.getIdentifierLoc(),
	II, R, TInfo, SC);

	if (R->getContainedDeducedType())
	ParsingInitForAutoVars.insert(NewVD);

	if (D.isInvalidType())
	NewVD->setInvalidDecl();

	if (NewVD->getType().hasNonTrivialToPrimitiveDestructCUnion() &&
	NewVD->hasLocalStorage())
	checkNonTrivialCUnion(NewVD->getType(), NewVD->getLocation(),
	NTCUC_AutoVar, NTCUK_Destruct);
	} else {
	bool Invalid = false;

	if (DC->isRecord() && !CurContext->isRecord()) {
	// This is an out-of-line definition of a static data member.
	switch (SC) {
	case SC_None:
	break;
	case SC_Static:
	Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	diag::err_static_out_of_line)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
	break;
	case SC_Auto:
	case SC_Register:
	case SC_Extern:
	// [dcl.stc] p2: The auto or register specifiers shall be applied only
	// to names of variables declared in a block or to function parameters.
	// [dcl.stc] p6: The extern specifier cannot be used in the declaration
	// of class members

	Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	diag::err_storage_class_for_static_member)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
	break;
	case SC_PrivateExtern:
	llvm_unreachable("C storage class in c++!");
	}
	}

	if (SC == SC_Static && CurContext->isRecord()) {
	if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(DC)) {
	// Walk up the enclosing DeclContexts to check for any that are
	// incompatible with static data members.
	const DeclContext *FunctionOrMethod = nullptr;
	const CXXRecordDecl *AnonStruct = nullptr;
	for (DeclContext *Ctxt = DC; Ctxt; Ctxt = Ctxt->getParent()) {
	if (Ctxt->isFunctionOrMethod()) {
	FunctionOrMethod = Ctxt;
	break;
	}
	const CXXRecordDecl *ParentDecl = dyn_cast<CXXRecordDecl>(Ctxt);
	if (ParentDecl && !ParentDecl->getDeclName()) {
	AnonStruct = ParentDecl;
	break;
	}
	}
	if (FunctionOrMethod) {
	// C++ [class.static.data]p5: A local class shall not have static data
	// members.
	Diag(D.getIdentifierLoc(),
	diag::err_static_data_member_not_allowed_in_local_class)
	<< Name << RD->getDeclName() << RD->getTagKind();
	} else if (AnonStruct) {
	// C++ [class.static.data]p4: Unnamed classes and classes contained
	// directly or indirectly within unnamed classes shall not contain
	// static data members.
	Diag(D.getIdentifierLoc(),
	diag::err_static_data_member_not_allowed_in_anon_struct)
	<< Name << AnonStruct->getTagKind();
	Invalid = true;
	} else if (RD->isUnion()) {
	// C++98 [class.union]p1: If a union contains a static data member,
	// the program is ill-formed. C++11 drops this restriction.
	Diag(D.getIdentifierLoc(),
	getLangOpts().CPlusPlus11
	? diag::warn_cxx98_compat_static_data_member_in_union
	: diag::ext_static_data_member_in_union) << Name;
	}
	}
	}

	// Match up the template parameter lists with the scope specifier, then
	// determine whether we have a template or a template specialization.
	bool InvalidScope = false;
	TemplateParams = MatchTemplateParametersToScopeSpecifier(
	D.getDeclSpec().getBeginLoc(), D.getIdentifierLoc(),
	D.getCXXScopeSpec(),
	D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId
	? D.getName().TemplateId
	: nullptr,
	TemplateParamLists,
	/never a friend/ false, IsMemberSpecialization, InvalidScope);
	Invalid \|= InvalidScope;

	if (TemplateParams) {
	if (!TemplateParams->size() &&
	D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) {
	// There is an extraneous 'template<>' for this variable. Complain
	// about it, but allow the declaration of the variable.
	Diag(TemplateParams->getTemplateLoc(),
	diag::err_template_variable_noparams)
	<< II
	<< SourceRange(TemplateParams->getTemplateLoc(),
	TemplateParams->getRAngleLoc());
	TemplateParams = nullptr;
	} else {
	// Check that we can declare a template here.
	if (CheckTemplateDeclScope(S, TemplateParams))
	return nullptr;

	if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) {
	// This is an explicit specialization or a partial specialization.
	IsVariableTemplateSpecialization = true;
	IsPartialSpecialization = TemplateParams->size() > 0;
	} else { // if (TemplateParams->size() > 0)
	// This is a template declaration.
	IsVariableTemplate = true;

	// Only C++1y supports variable templates (N3651).
	Diag(D.getIdentifierLoc(),
	getLangOpts().CPlusPlus14
	? diag::warn_cxx11_compat_variable_template
	: diag::ext_variable_template);
	}
	}
	} else {
	// Check that we can declare a member specialization here.
	if (!TemplateParamLists.empty() && IsMemberSpecialization &&
	CheckTemplateDeclScope(S, TemplateParamLists.back()))
	return nullptr;
	assert((Invalid \|\|
	D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) &&
	"should have a 'template<>' for this decl");
	}

	if (IsVariableTemplateSpecialization) {
	SourceLocation TemplateKWLoc =
	TemplateParamLists.size() > 0
	? TemplateParamLists[0]->getTemplateLoc()
	: SourceLocation();
	DeclResult Res = ActOnVarTemplateSpecialization(
	S, D, TInfo, TemplateKWLoc, TemplateParams, SC,
	IsPartialSpecialization);
	if (Res.isInvalid())
	return nullptr;
	NewVD = cast<VarDecl>(Res.get());
	AddToScope = false;
	} else if (D.isDecompositionDeclarator()) {
	NewVD = DecompositionDecl::Create(Context, DC, D.getBeginLoc(),
	D.getIdentifierLoc(), R, TInfo, SC,
	Bindings);
	} else
	NewVD = VarDecl::Create(Context, DC, D.getBeginLoc(),
	D.getIdentifierLoc(), II, R, TInfo, SC);

	// If this is supposed to be a variable template, create it as such.
	if (IsVariableTemplate) {
	NewTemplate =
	VarTemplateDecl::Create(Context, DC, D.getIdentifierLoc(), Name,
	TemplateParams, NewVD);
	NewVD->setDescribedVarTemplate(NewTemplate);
	}

	// If this decl has an auto type in need of deduction, make a note of the
	// Decl so we can diagnose uses of it in its own initializer.
	if (R->getContainedDeducedType())
	ParsingInitForAutoVars.insert(NewVD);

	if (D.isInvalidType() \|\| Invalid) {
	NewVD->setInvalidDecl();
	if (NewTemplate)
	NewTemplate->setInvalidDecl();
	}

	SetNestedNameSpecifier(*this, NewVD, D);

	// If we have any template parameter lists that don't directly belong to
	// the variable (matching the scope specifier), store them.
	unsigned VDTemplateParamLists = TemplateParams ? 1 : 0;
	if (TemplateParamLists.size() > VDTemplateParamLists)
	NewVD->setTemplateParameterListsInfo(
	Context, TemplateParamLists.drop_back(VDTemplateParamLists));
	}

	if (D.getDeclSpec().isInlineSpecified()) {
	if (!getLangOpts().CPlusPlus) {
	Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
	<< 0;
	} else if (CurContext->isFunctionOrMethod()) {
	// 'inline' is not allowed on block scope variable declaration.
	Diag(D.getDeclSpec().getInlineSpecLoc(),
	diag::err_inline_declaration_block_scope) << Name
	<< FixItHint::CreateRemoval(D.getDeclSpec().getInlineSpecLoc());
	} else {
	Diag(D.getDeclSpec().getInlineSpecLoc(),
	getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_inline_variable
	: diag::ext_inline_variable);
	NewVD->setInlineSpecified();
	}
	}

	// Set the lexical context. If the declarator has a C++ scope specifier, the
	// lexical context will be different from the semantic context.
	NewVD->setLexicalDeclContext(CurContext);
	if (NewTemplate)
	NewTemplate->setLexicalDeclContext(CurContext);

	if (IsLocalExternDecl) {
	if (D.isDecompositionDeclarator())
	for (auto *B : Bindings)
	B->setLocalExternDecl();
	else
	NewVD->setLocalExternDecl();
	}

	bool EmitTLSUnsupportedError = false;
	if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec()) {
	// C++11 [dcl.stc]p4:
	// When thread_local is applied to a variable of block scope the
	// storage-class-specifier static is implied if it does not appear
	// explicitly.
	// Core issue: 'static' is not implied if the variable is declared
	// 'extern'.
	if (NewVD->hasLocalStorage() &&
	(SCSpec != DeclSpec::SCS_unspecified \|\|
	TSCS != DeclSpec::TSCS_thread_local \|\|
	!DC->isFunctionOrMethod()))
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_thread_non_global)
	<< DeclSpec::getSpecifierName(TSCS);
	else if (!Context.getTargetInfo().isTLSSupported()) {
	if (getLangOpts().CUDA \|\| getLangOpts().OpenMPIsDevice \|\|
	getLangOpts().SYCLIsDevice) {
	// Postpone error emission until we've collected attributes required to
	// figure out whether it's a host or device variable and whether the
	// error should be ignored.
	EmitTLSUnsupportedError = true;
	// We still need to mark the variable as TLS so it shows up in AST with
	// proper storage class for other tools to use even if we're not going
	// to emit any code for it.
	NewVD->setTSCSpec(TSCS);
	} else
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_thread_unsupported);
	} else
	NewVD->setTSCSpec(TSCS);
	}

	switch (D.getDeclSpec().getConstexprSpecifier()) {
	case ConstexprSpecKind::Unspecified:
	break;

	case ConstexprSpecKind::Consteval:
	Diag(D.getDeclSpec().getConstexprSpecLoc(),
	diag::err_constexpr_wrong_decl_kind)
	<< static_cast<int>(D.getDeclSpec().getConstexprSpecifier());
	[[fallthrough]];

	case ConstexprSpecKind::Constexpr:
	NewVD->setConstexpr(true);
	// C++1z [dcl.spec.constexpr]p1:
	// A static data member declared with the constexpr specifier is
	// implicitly an inline variable.
	if (NewVD->isStaticDataMember() &&
	(getLangOpts().CPlusPlus17 \|\|
	Context.getTargetInfo().getCXXABI().isMicrosoft()))
	NewVD->setImplicitlyInline();
	break;

	case ConstexprSpecKind::Constinit:
	if (!NewVD->hasGlobalStorage())
	Diag(D.getDeclSpec().getConstexprSpecLoc(),
	diag::err_constinit_local_variable);
	else
	NewVD->addAttr(ConstInitAttr::Create(
	Context, D.getDeclSpec().getConstexprSpecLoc(),
	AttributeCommonInfo::AS_Keyword, ConstInitAttr::Keyword_constinit));
	break;
	}

	// C99 6.7.4p3
	// An inline definition of a function with external linkage shall
	// not contain a definition of a modifiable object with static or
	// thread storage duration...
	// We only apply this when the function is required to be defined
	// elsewhere, i.e. when the function is not 'extern inline'. Note
	// that a local variable with thread storage duration still has to
	// be marked 'static'. Also note that it's possible to get these
	// semantics in C++ using __attribute__((gnu_inline)).
	if (SC == SC_Static && S->getFnParent() != nullptr &&
	!NewVD->getType().isConstQualified()) {
	FunctionDecl *CurFD = getCurFunctionDecl();
	if (CurFD && isFunctionDefinitionDiscarded(*this, CurFD)) {
	Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	diag::warn_static_local_in_extern_inline);
	MaybeSuggestAddingStaticToDecl(CurFD);
	}
	}

	if (D.getDeclSpec().isModulePrivateSpecified()) {
	if (IsVariableTemplateSpecialization)
	Diag(NewVD->getLocation(), diag::err_module_private_specialization)
	<< (IsPartialSpecialization ? 1 : 0)
	<< FixItHint::CreateRemoval(
	D.getDeclSpec().getModulePrivateSpecLoc());
	else if (IsMemberSpecialization)
	Diag(NewVD->getLocation(), diag::err_module_private_specialization)
	<< 2
	<< FixItHint::CreateRemoval(D.getDeclSpec().getModulePrivateSpecLoc());
	else if (NewVD->hasLocalStorage())
	Diag(NewVD->getLocation(), diag::err_module_private_local)
	<< 0 << NewVD
	<< SourceRange(D.getDeclSpec().getModulePrivateSpecLoc())
	<< FixItHint::CreateRemoval(
	D.getDeclSpec().getModulePrivateSpecLoc());
	else {
	NewVD->setModulePrivate();
	if (NewTemplate)
	NewTemplate->setModulePrivate();
	for (auto *B : Bindings)
	B->setModulePrivate();
	}
	}

	if (getLangOpts().OpenCL) {
	deduceOpenCLAddressSpace(NewVD);

	DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec();
	if (TSC != TSCS_unspecified) {
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_opencl_unknown_type_specifier)
	<< getLangOpts().getOpenCLVersionString()
	<< DeclSpec::getSpecifierName(TSC) << 1;
	NewVD->setInvalidDecl();
	}
	}

	// Handle attributes prior to checking for duplicates in MergeVarDecl
	ProcessDeclAttributes(S, NewVD, D);

	// FIXME: This is probably the wrong location to be doing this and we should
	// probably be doing this for more attributes (especially for function
	// pointer attributes such as format, warn_unused_result, etc.). Ideally
	// the code to copy attributes would be generated by TableGen.
	if (R->isFunctionPointerType())
	if (const auto *TT = R->getAs<TypedefType>())
	copyAttrFromTypedefToDecl<AllocSizeAttr>(*this, NewVD, TT);

	if (getLangOpts().CUDA \|\| getLangOpts().OpenMPIsDevice \|\|
	getLangOpts().SYCLIsDevice) {
	if (EmitTLSUnsupportedError &&
	((getLangOpts().CUDA && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD)) \|\|
	(getLangOpts().OpenMPIsDevice &&
	OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(NewVD))))
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_thread_unsupported);

	if (EmitTLSUnsupportedError &&
	(LangOpts.SYCLIsDevice \|\| (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)))
	targetDiag(D.getIdentifierLoc(), diag::err_thread_unsupported);
	// CUDA B.2.5: "__shared__ and __constant__ variables have implied static
	// storage [duration]."
	if (SC == SC_None && S->getFnParent() != nullptr &&
	(NewVD->hasAttr<CUDASharedAttr>() \|\|
	NewVD->hasAttr<CUDAConstantAttr>())) {
	NewVD->setStorageClass(SC_Static);
	}
	}

	// Ensure that dllimport globals without explicit storage class are treated as
	// extern. The storage class is set above using parsed attributes. Now we can
	// check the VarDecl itself.
	assert(!NewVD->hasAttr<DLLImportAttr>() \|\|
	NewVD->getAttr<DLLImportAttr>()->isInherited() \|\|
	NewVD->isStaticDataMember() \|\| NewVD->getStorageClass() != SC_None);

	// In auto-retain/release, infer strong retension for variables of
	// retainable type.
	if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(NewVD))
	NewVD->setInvalidDecl();

	// Handle GNU asm-label extension (encoded as an attribute).
	if (Expr E = (Expr)D.getAsmLabel()) {
	// The parser guarantees this is a string.
	StringLiteral *SE = cast<StringLiteral>(E);
	StringRef Label = SE->getString();
	if (S->getFnParent() != nullptr) {
	switch (SC) {
	case SC_None:
	case SC_Auto:
	Diag(E->getExprLoc(), diag::warn_asm_label_on_auto_decl) << Label;
	break;
	case SC_Register:
	// Local Named register
	if (!Context.getTargetInfo().isValidGCCRegisterName(Label) &&
	DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl()))
	Diag(E->getExprLoc(), diag::err_asm_unknown_register_name) << Label;
	break;
	case SC_Static:
	case SC_Extern:
	case SC_PrivateExtern:
	break;
	}
	} else if (SC == SC_Register) {
	// Global Named register
	if (DeclAttrsMatchCUDAMode(getLangOpts(), NewVD)) {
	const auto &TI = Context.getTargetInfo();
	bool HasSizeMismatch;

	if (!TI.isValidGCCRegisterName(Label))
	Diag(E->getExprLoc(), diag::err_asm_unknown_register_name) << Label;
	else if (!TI.validateGlobalRegisterVariable(Label,
	Context.getTypeSize(R),
	HasSizeMismatch))
	Diag(E->getExprLoc(), diag::err_asm_invalid_global_var_reg) << Label;
	else if (HasSizeMismatch)
	Diag(E->getExprLoc(), diag::err_asm_register_size_mismatch) << Label;
	}

	if (!R->isIntegralType(Context) && !R->isPointerType()) {
	Diag(D.getBeginLoc(), diag::err_asm_bad_register_type);
	NewVD->setInvalidDecl(true);
	}
	}

	NewVD->addAttr(AsmLabelAttr::Create(Context, Label,
	/IsLiteralLabel=/true,
	SE->getStrTokenLoc(0)));
	} else if (!ExtnameUndeclaredIdentifiers.empty()) {
	llvm::DenseMap<IdentifierInfo,AsmLabelAttr>::iterator I =
	ExtnameUndeclaredIdentifiers.find(NewVD->getIdentifier());
	if (I != ExtnameUndeclaredIdentifiers.end()) {
	if (isDeclExternC(NewVD)) {
	NewVD->addAttr(I->second);
	ExtnameUndeclaredIdentifiers.erase(I);
	} else
	Diag(NewVD->getLocation(), diag::warn_redefine_extname_not_applied)
	<< /Variable/1 << NewVD;
	}
	}

	// Find the shadowed declaration before filtering for scope.
	NamedDecl *ShadowedDecl = D.getCXXScopeSpec().isEmpty()
	? getShadowedDeclaration(NewVD, Previous)
	: nullptr;

	// Don't consider existing declarations that are in a different
	// scope and are out-of-semantic-context declarations (if the new
	// declaration has linkage).
	FilterLookupForScope(Previous, OriginalDC, S, shouldConsiderLinkage(NewVD),
	D.getCXXScopeSpec().isNotEmpty() \|\|
	IsMemberSpecialization \|\|
	IsVariableTemplateSpecialization);

	// Check whether the previous declaration is in the same block scope. This
	// affects whether we merge types with it, per C++11 [dcl.array]p3.
	if (getLangOpts().CPlusPlus &&
	NewVD->isLocalVarDecl() && NewVD->hasExternalStorage())
	NewVD->setPreviousDeclInSameBlockScope(
	Previous.isSingleResult() && !Previous.isShadowed() &&
	isDeclInScope(Previous.getFoundDecl(), OriginalDC, S, false));

	if (!getLangOpts().CPlusPlus) {
	D.setRedeclaration(CheckVariableDeclaration(NewVD, Previous));
	} else {
	// If this is an explicit specialization of a static data member, check it.
	if (IsMemberSpecialization && !NewVD->isInvalidDecl() &&
	CheckMemberSpecialization(NewVD, Previous))
	NewVD->setInvalidDecl();

	// Merge the decl with the existing one if appropriate.
	if (!Previous.empty()) {
	if (Previous.isSingleResult() &&
	isa<FieldDecl>(Previous.getFoundDecl()) &&
	D.getCXXScopeSpec().isSet()) {
	// The user tried to define a non-static data member
	// out-of-line (C++ [dcl.meaning]p1).
	Diag(NewVD->getLocation(), diag::err_nonstatic_member_out_of_line)
	<< D.getCXXScopeSpec().getRange();
	Previous.clear();
	NewVD->setInvalidDecl();
	}
	} else if (D.getCXXScopeSpec().isSet()) {
	// No previous declaration in the qualifying scope.
	Diag(D.getIdentifierLoc(), diag::err_no_member)
	<< Name << computeDeclContext(D.getCXXScopeSpec(), true)
	<< D.getCXXScopeSpec().getRange();
	NewVD->setInvalidDecl();
	}

	if (!IsVariableTemplateSpecialization)
	D.setRedeclaration(CheckVariableDeclaration(NewVD, Previous));

	if (NewTemplate) {
	VarTemplateDecl *PrevVarTemplate =
	NewVD->getPreviousDecl()
	? NewVD->getPreviousDecl()->getDescribedVarTemplate()
	: nullptr;

	// Check the template parameter list of this declaration, possibly
	// merging in the template parameter list from the previous variable
	// template declaration.
	if (CheckTemplateParameterList(
	TemplateParams,
	PrevVarTemplate ? PrevVarTemplate->getTemplateParameters()
	: nullptr,
	(D.getCXXScopeSpec().isSet() && DC && DC->isRecord() &&
	DC->isDependentContext())
	? TPC_ClassTemplateMember
	: TPC_VarTemplate))
	NewVD->setInvalidDecl();

	// If we are providing an explicit specialization of a static variable
	// template, make a note of that.
	if (PrevVarTemplate &&
	PrevVarTemplate->getInstantiatedFromMemberTemplate())
	PrevVarTemplate->setMemberSpecialization();
	}
	}

	// Diagnose shadowed variables iff this isn't a redeclaration.
	if (ShadowedDecl && !D.isRedeclaration())
	CheckShadow(NewVD, ShadowedDecl, Previous);

	ProcessPragmaWeak(S, NewVD);

	// If this is the first declaration of an extern C variable, update
	// the map of such variables.
	if (NewVD->isFirstDecl() && !NewVD->isInvalidDecl() &&
	isIncompleteDeclExternC(*this, NewVD))
	RegisterLocallyScopedExternCDecl(NewVD, S);

	if (getLangOpts().CPlusPlus && NewVD->isStaticLocal()) {
	MangleNumberingContext *MCtx;
	Decl *ManglingContextDecl;
	std::tie(MCtx, ManglingContextDecl) =
	getCurrentMangleNumberContext(NewVD->getDeclContext());
	if (MCtx) {
	Context.setManglingNumber(
	NewVD, MCtx->getManglingNumber(
	NewVD, getMSManglingNumber(getLangOpts(), S)));
	Context.setStaticLocalNumber(NewVD, MCtx->getStaticLocalNumber(NewVD));
	}
	}

	// Special handling of variable named 'main'.
	if (Name.getAsIdentifierInfo() && Name.getAsIdentifierInfo()->isStr("main") &&
	NewVD->getDeclContext()->getRedeclContext()->isTranslationUnit() &&
	!getLangOpts().Freestanding && !NewVD->getDescribedVarTemplate()) {

	// C++ [basic.start.main]p3
	// A program that declares a variable main at global scope is ill-formed.
	if (getLangOpts().CPlusPlus)
	Diag(D.getBeginLoc(), diag::err_main_global_variable);

	// In C, and external-linkage variable named main results in undefined
	// behavior.
	else if (NewVD->hasExternalFormalLinkage())
	Diag(D.getBeginLoc(), diag::warn_main_redefined);
	}

	if (D.isRedeclaration() && !Previous.empty()) {
	NamedDecl *Prev = Previous.getRepresentativeDecl();
	checkDLLAttributeRedeclaration(*this, Prev, NewVD, IsMemberSpecialization,
	D.isFunctionDefinition());
	}

	if (NewTemplate) {
	if (NewVD->isInvalidDecl())
	NewTemplate->setInvalidDecl();
	ActOnDocumentableDecl(NewTemplate);
	return NewTemplate;
	}

	if (IsMemberSpecialization && !NewVD->isInvalidDecl())
	CompleteMemberSpecialization(NewVD, Previous);

	emitReadOnlyPlacementAttrWarning(*this, NewVD);

	return NewVD;
	}

	/// Enum describing the %select options in diag::warn_decl_shadow.
	enum ShadowedDeclKind {
	SDK_Local,
	SDK_Global,
	SDK_StaticMember,
	SDK_Field,
	SDK_Typedef,
	SDK_Using,
	SDK_StructuredBinding
	};

	/// Determine what kind of declaration we're shadowing.
	static ShadowedDeclKind computeShadowedDeclKind(const NamedDecl *ShadowedDecl,
	const DeclContext *OldDC) {
	if (isa<TypeAliasDecl>(ShadowedDecl))
	return SDK_Using;
	else if (isa<TypedefDecl>(ShadowedDecl))
	return SDK_Typedef;
	else if (isa<BindingDecl>(ShadowedDecl))
	return SDK_StructuredBinding;
	else if (isa<RecordDecl>(OldDC))
	return isa<FieldDecl>(ShadowedDecl) ? SDK_Field : SDK_StaticMember;

	return OldDC->isFileContext() ? SDK_Global : SDK_Local;
	}

	/// Return the location of the capture if the given lambda captures the given
	/// variable \p VD, or an invalid source location otherwise.
	static SourceLocation getCaptureLocation(const LambdaScopeInfo *LSI,
	const VarDecl *VD) {
	for (const Capture &Capture : LSI->Captures) {
	if (Capture.isVariableCapture() && Capture.getVariable() == VD)
	return Capture.getLocation();
	}
	return SourceLocation();
	}

	static bool shouldWarnIfShadowedDecl(const DiagnosticsEngine &Diags,
	const LookupResult &R) {
	// Only diagnose if we're shadowing an unambiguous field or variable.
	if (R.getResultKind() != LookupResult::Found)
	return false;

	// Return false if warning is ignored.
	return !Diags.isIgnored(diag::warn_decl_shadow, R.getNameLoc());
	}

	/// Return the declaration shadowed by the given variable \p D, or null
	/// if it doesn't shadow any declaration or shadowing warnings are disabled.
	NamedDecl Sema::getShadowedDeclaration(const VarDecl D,
	const LookupResult &R) {
	if (!shouldWarnIfShadowedDecl(Diags, R))
	return nullptr;

	// Don't diagnose declarations at file scope.
	if (D->hasGlobalStorage())
	return nullptr;

	NamedDecl *ShadowedDecl = R.getFoundDecl();
	return isa<VarDecl, FieldDecl, BindingDecl>(ShadowedDecl) ? ShadowedDecl
	: nullptr;
	}

	/// Return the declaration shadowed by the given typedef \p D, or null
	/// if it doesn't shadow any declaration or shadowing warnings are disabled.
	NamedDecl Sema::getShadowedDeclaration(const TypedefNameDecl D,
	const LookupResult &R) {
	// Don't warn if typedef declaration is part of a class
	if (D->getDeclContext()->isRecord())
	return nullptr;

	if (!shouldWarnIfShadowedDecl(Diags, R))
	return nullptr;

	NamedDecl *ShadowedDecl = R.getFoundDecl();
	return isa<TypedefNameDecl>(ShadowedDecl) ? ShadowedDecl : nullptr;
	}

	/// Return the declaration shadowed by the given variable \p D, or null
	/// if it doesn't shadow any declaration or shadowing warnings are disabled.
	NamedDecl Sema::getShadowedDeclaration(const BindingDecl D,
	const LookupResult &R) {
	if (!shouldWarnIfShadowedDecl(Diags, R))
	return nullptr;

	NamedDecl *ShadowedDecl = R.getFoundDecl();
	return isa<VarDecl, FieldDecl, BindingDecl>(ShadowedDecl) ? ShadowedDecl
	: nullptr;
	}

	/// Diagnose variable or built-in function shadowing. Implements
	/// -Wshadow.
	///
	/// This method is called whenever a VarDecl is added to a "useful"
	/// scope.
	///
	/// \param ShadowedDecl the declaration that is shadowed by the given variable
	/// \param R the lookup of the name
	///
	void Sema::CheckShadow(NamedDecl D, NamedDecl ShadowedDecl,
	const LookupResult &R) {
	DeclContext *NewDC = D->getDeclContext();

	if (FieldDecl *FD = dyn_cast<FieldDecl>(ShadowedDecl)) {
	// Fields are not shadowed by variables in C++ static methods.
	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewDC))
	if (MD->isStatic())
	return;

	// Fields shadowed by constructor parameters are a special case. Usually
	// the constructor initializes the field with the parameter.
	if (isa<CXXConstructorDecl>(NewDC))
	if (const auto PVD = dyn_cast<ParmVarDecl>(D)) {
	// Remember that this was shadowed so we can either warn about its
	// modification or its existence depending on warning settings.
	ShadowingDecls.insert({PVD->getCanonicalDecl(), FD});
	return;
	}
	}

	if (VarDecl *shadowedVar = dyn_cast<VarDecl>(ShadowedDecl))
	if (shadowedVar->isExternC()) {
	// For shadowing external vars, make sure that we point to the global
	// declaration, not a locally scoped extern declaration.
	for (auto *I : shadowedVar->redecls())
	if (I->isFileVarDecl()) {
	ShadowedDecl = I;
	break;
	}
	}

	DeclContext *OldDC = ShadowedDecl->getDeclContext()->getRedeclContext();

	unsigned WarningDiag = diag::warn_decl_shadow;
	SourceLocation CaptureLoc;
	if (isa<VarDecl>(D) && isa<VarDecl>(ShadowedDecl) && NewDC &&
	isa<CXXMethodDecl>(NewDC)) {
	if (const auto *RD = dyn_cast<CXXRecordDecl>(NewDC->getParent())) {
	if (RD->isLambda() && OldDC->Encloses(NewDC->getLexicalParent())) {
	if (RD->getLambdaCaptureDefault() == LCD_None) {
	// Try to avoid warnings for lambdas with an explicit capture list.
	const auto *LSI = cast<LambdaScopeInfo>(getCurFunction());
	// Warn only when the lambda captures the shadowed decl explicitly.
	CaptureLoc = getCaptureLocation(LSI, cast<VarDecl>(ShadowedDecl));
	if (CaptureLoc.isInvalid())
	WarningDiag = diag::warn_decl_shadow_uncaptured_local;
	} else {
	// Remember that this was shadowed so we can avoid the warning if the
	// shadowed decl isn't captured and the warning settings allow it.
	cast<LambdaScopeInfo>(getCurFunction())
	->ShadowingDecls.push_back(
	{cast<VarDecl>(D), cast<VarDecl>(ShadowedDecl)});
	return;
	}
	}

	if (cast<VarDecl>(ShadowedDecl)->hasLocalStorage()) {
	// A variable can't shadow a local variable in an enclosing scope, if
	// they are separated by a non-capturing declaration context.
	for (DeclContext *ParentDC = NewDC;
	ParentDC && !ParentDC->Equals(OldDC);
	ParentDC = getLambdaAwareParentOfDeclContext(ParentDC)) {
	// Only block literals, captured statements, and lambda expressions
	// can capture; other scopes don't.
	if (!isa<BlockDecl>(ParentDC) && !isa<CapturedDecl>(ParentDC) &&
	!isLambdaCallOperator(ParentDC)) {
	return;
	}
	}
	}
	}
	}

	// Only warn about certain kinds of shadowing for class members.
	if (NewDC && NewDC->isRecord()) {
	// In particular, don't warn about shadowing non-class members.
	if (!OldDC->isRecord())
	return;

	// TODO: should we warn about static data members shadowing
	// static data members from base classes?

	// TODO: don't diagnose for inaccessible shadowed members.
	// This is hard to do perfectly because we might friend the
	// shadowing context, but that's just a false negative.
	}


	DeclarationName Name = R.getLookupName();

	// Emit warning and note.
	ShadowedDeclKind Kind = computeShadowedDeclKind(ShadowedDecl, OldDC);
	Diag(R.getNameLoc(), WarningDiag) << Name << Kind << OldDC;
	if (!CaptureLoc.isInvalid())
	Diag(CaptureLoc, diag::note_var_explicitly_captured_here)
	<< Name << /explicitly/ 1;
	Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);
	}

	/// Diagnose shadowing for variables shadowed in the lambda record \p LambdaRD
	/// when these variables are captured by the lambda.
	void Sema::DiagnoseShadowingLambdaDecls(const LambdaScopeInfo *LSI) {
	for (const auto &Shadow : LSI->ShadowingDecls) {
	const VarDecl *ShadowedDecl = Shadow.ShadowedDecl;
	// Try to avoid the warning when the shadowed decl isn't captured.
	SourceLocation CaptureLoc = getCaptureLocation(LSI, ShadowedDecl);
	const DeclContext *OldDC = ShadowedDecl->getDeclContext();
	Diag(Shadow.VD->getLocation(), CaptureLoc.isInvalid()
	? diag::warn_decl_shadow_uncaptured_local
	: diag::warn_decl_shadow)
	<< Shadow.VD->getDeclName()
	<< computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC;
	if (!CaptureLoc.isInvalid())
	Diag(CaptureLoc, diag::note_var_explicitly_captured_here)
	<< Shadow.VD->getDeclName() << /explicitly/ 0;
	Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);
	}
	}

	/// Check -Wshadow without the advantage of a previous lookup.
	void Sema::CheckShadow(Scope S, VarDecl D) {
	if (Diags.isIgnored(diag::warn_decl_shadow, D->getLocation()))
	return;

	LookupResult R(*this, D->getDeclName(), D->getLocation(),
	Sema::LookupOrdinaryName, Sema::ForVisibleRedeclaration);
	LookupName(R, S);
	if (NamedDecl *ShadowedDecl = getShadowedDeclaration(D, R))
	CheckShadow(D, ShadowedDecl, R);
	}

	/// Check if 'E', which is an expression that is about to be modified, refers
	/// to a constructor parameter that shadows a field.
	void Sema::CheckShadowingDeclModification(Expr *E, SourceLocation Loc) {
	// Quickly ignore expressions that can't be shadowing ctor parameters.
	if (!getLangOpts().CPlusPlus \|\| ShadowingDecls.empty())
	return;
	E = E->IgnoreParenImpCasts();
	auto *DRE = dyn_cast<DeclRefExpr>(E);
	if (!DRE)
	return;
	const NamedDecl *D = cast<NamedDecl>(DRE->getDecl()->getCanonicalDecl());
	auto I = ShadowingDecls.find(D);
	if (I == ShadowingDecls.end())
	return;
	const NamedDecl *ShadowedDecl = I->second;
	const DeclContext *OldDC = ShadowedDecl->getDeclContext();
	Diag(Loc, diag::warn_modifying_shadowing_decl) << D << OldDC;
	Diag(D->getLocation(), diag::note_var_declared_here) << D;
	Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);

	// Avoid issuing multiple warnings about the same decl.
	ShadowingDecls.erase(I);
	}

	/// Check for conflict between this global or extern "C" declaration and
	/// previous global or extern "C" declarations. This is only used in C++.
	template<typename T>
	static bool checkGlobalOrExternCConflict(
	Sema &S, const T *ND, bool IsGlobal, LookupResult &Previous) {
	assert(S.getLangOpts().CPlusPlus && "only C++ has extern \"C\"");
	NamedDecl *Prev = S.findLocallyScopedExternCDecl(ND->getDeclName());

	if (!Prev && IsGlobal && !isIncompleteDeclExternC(S, ND)) {
	// The common case: this global doesn't conflict with any extern "C"
	// declaration.
	return false;
	}

	if (Prev) {
	if (!IsGlobal \|\| isIncompleteDeclExternC(S, ND)) {
	// Both the old and new declarations have C language linkage. This is a
	// redeclaration.
	Previous.clear();
	Previous.addDecl(Prev);
	return true;
	}

	// This is a global, non-extern "C" declaration, and there is a previous
	// non-global extern "C" declaration. Diagnose if this is a variable
	// declaration.
	if (!isa<VarDecl>(ND))
	return false;
	} else {
	// The declaration is extern "C". Check for any declaration in the
	// translation unit which might conflict.
	if (IsGlobal) {
	// We have already performed the lookup into the translation unit.
	IsGlobal = false;
	for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
	I != E; ++I) {
	if (isa<VarDecl>(*I)) {
	Prev = *I;
	break;
	}
	}
	} else {
	DeclContext::lookup_result R =
	S.Context.getTranslationUnitDecl()->lookup(ND->getDeclName());
	for (DeclContext::lookup_result::iterator I = R.begin(), E = R.end();
	I != E; ++I) {
	if (isa<VarDecl>(*I)) {
	Prev = *I;
	break;
	}
	// FIXME: If we have any other entity with this name in global scope,
	// the declaration is ill-formed, but that is a defect: it breaks the
	// 'stat' hack, for instance. Only variables can have mangled name
	// clashes with extern "C" declarations, so only they deserve a
	// diagnostic.
	}
	}

	if (!Prev)
	return false;
	}

	// Use the first declaration's location to ensure we point at something which
	// is lexically inside an extern "C" linkage-spec.
	assert(Prev && "should have found a previous declaration to diagnose");
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Prev))
	Prev = FD->getFirstDecl();
	else
	Prev = cast<VarDecl>(Prev)->getFirstDecl();

	S.Diag(ND->getLocation(), diag::err_extern_c_global_conflict)
	<< IsGlobal << ND;
	S.Diag(Prev->getLocation(), diag::note_extern_c_global_conflict)
	<< IsGlobal;
	return false;
	}

	/// Apply special rules for handling extern "C" declarations. Returns \c true
	/// if we have found that this is a redeclaration of some prior entity.
	///
	/// Per C++ [dcl.link]p6:
	/// Two declarations [for a function or variable] with C language linkage
	/// with the same name that appear in different scopes refer to the same
	/// [entity]. An entity with C language linkage shall not be declared with
	/// the same name as an entity in global scope.
	template<typename T>
	static bool checkForConflictWithNonVisibleExternC(Sema &S, const T *ND,
	LookupResult &Previous) {
	if (!S.getLangOpts().CPlusPlus) {
	// In C, when declaring a global variable, look for a corresponding 'extern'
	// variable declared in function scope. We don't need this in C++, because
	// we find local extern decls in the surrounding file-scope DeclContext.
	if (ND->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
	if (NamedDecl *Prev = S.findLocallyScopedExternCDecl(ND->getDeclName())) {
	Previous.clear();
	Previous.addDecl(Prev);
	return true;
	}
	}
	return false;
	}

	// A declaration in the translation unit can conflict with an extern "C"
	// declaration.
	if (ND->getDeclContext()->getRedeclContext()->isTranslationUnit())
	return checkGlobalOrExternCConflict(S, ND, /IsGlobal/true, Previous);

	// An extern "C" declaration can conflict with a declaration in the
	// translation unit or can be a redeclaration of an extern "C" declaration
	// in another scope.
	if (isIncompleteDeclExternC(S,ND))
	return checkGlobalOrExternCConflict(S, ND, /IsGlobal/false, Previous);

	// Neither global nor extern "C": nothing to do.
	return false;
	}

	void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
	// If the decl is already known invalid, don't check it.
	if (NewVD->isInvalidDecl())
	return;

	QualType T = NewVD->getType();

	// Defer checking an 'auto' type until its initializer is attached.
	if (T->isUndeducedType())
	return;

	if (NewVD->hasAttrs())
	CheckAlignasUnderalignment(NewVD);

	if (T->isObjCObjectType()) {
	Diag(NewVD->getLocation(), diag::err_statically_allocated_object)
	<< FixItHint::CreateInsertion(NewVD->getLocation(), "*");
	T = Context.getObjCObjectPointerType(T);
	NewVD->setType(T);
	}

	// Emit an error if an address space was applied to decl with local storage.
	// This includes arrays of objects with address space qualifiers, but not
	// automatic variables that point to other address spaces.
	// ISO/IEC TR 18037 S5.1.2
	if (!getLangOpts().OpenCL && NewVD->hasLocalStorage() &&
	T.getAddressSpace() != LangAS::Default) {
	Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl) << 0;
	NewVD->setInvalidDecl();
	return;
	}

	// OpenCL v1.2 s6.8 - The static qualifier is valid only in program
	// scope.
	if (getLangOpts().OpenCLVersion == 120 &&
	!getOpenCLOptions().isAvailableOption("cl_clang_storage_class_specifiers",
	getLangOpts()) &&
	NewVD->isStaticLocal()) {
	Diag(NewVD->getLocation(), diag::err_static_function_scope);
	NewVD->setInvalidDecl();
	return;
	}

	if (getLangOpts().OpenCL) {
	if (!diagnoseOpenCLTypes(*this, NewVD))
	return;

	// OpenCL v2.0 s6.12.5 - The __block storage type is not supported.
	if (NewVD->hasAttr<BlocksAttr>()) {
	Diag(NewVD->getLocation(), diag::err_opencl_block_storage_type);
	return;
	}

	if (T->isBlockPointerType()) {
	// OpenCL v2.0 s6.12.5 - Any block declaration must be const qualified and
	// can't use 'extern' storage class.
	if (!T.isConstQualified()) {
	Diag(NewVD->getLocation(), diag::err_opencl_invalid_block_declaration)
	<< 0 /const/;
	NewVD->setInvalidDecl();
	return;
	}
	if (NewVD->hasExternalStorage()) {
	Diag(NewVD->getLocation(), diag::err_opencl_extern_block_declaration);
	NewVD->setInvalidDecl();
	return;
	}
	}

	// FIXME: Adding local AS in C++ for OpenCL might make sense.
	if (NewVD->isFileVarDecl() \|\| NewVD->isStaticLocal() \|\|
	NewVD->hasExternalStorage()) {
	if (!T->isSamplerT() && !T->isDependentType() &&
	!(T.getAddressSpace() == LangAS::opencl_constant \|\|
	(T.getAddressSpace() == LangAS::opencl_global &&
	getOpenCLOptions().areProgramScopeVariablesSupported(
	getLangOpts())))) {
	int Scope = NewVD->isStaticLocal() \| NewVD->hasExternalStorage() << 1;
	if (getOpenCLOptions().areProgramScopeVariablesSupported(getLangOpts()))
	Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
	<< Scope << "global or constant";
	else
	Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
	<< Scope << "constant";
	NewVD->setInvalidDecl();
	return;
	}
	} else {
	if (T.getAddressSpace() == LangAS::opencl_global) {
	Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
	<< 1 /is any function/ << "global";
	NewVD->setInvalidDecl();
	return;
	}
	if (T.getAddressSpace() == LangAS::opencl_constant \|\|
	T.getAddressSpace() == LangAS::opencl_local) {
	FunctionDecl *FD = getCurFunctionDecl();
	// OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables
	// in functions.
	if (FD && !FD->hasAttr<OpenCLKernelAttr>()) {
	if (T.getAddressSpace() == LangAS::opencl_constant)
	Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
	<< 0 /non-kernel only/ << "constant";
	else
	Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
	<< 0 /non-kernel only/ << "local";
	NewVD->setInvalidDecl();
	return;
	}
	// OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be
	// in the outermost scope of a kernel function.
	if (FD && FD->hasAttr<OpenCLKernelAttr>()) {
	if (!getCurScope()->isFunctionScope()) {
	if (T.getAddressSpace() == LangAS::opencl_constant)
	Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
	<< "constant";
	else
	Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
	<< "local";
	NewVD->setInvalidDecl();
	return;
	}
	}
	} else if (T.getAddressSpace() != LangAS::opencl_private &&
	// If we are parsing a template we didn't deduce an addr
	// space yet.
	T.getAddressSpace() != LangAS::Default) {
	// Do not allow other address spaces on automatic variable.
	Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl) << 1;
	NewVD->setInvalidDecl();
	return;
	}
	}
	}

	if (NewVD->hasLocalStorage() && T.isObjCGCWeak()
	&& !NewVD->hasAttr<BlocksAttr>()) {
	if (getLangOpts().getGC() != LangOptions::NonGC)
	Diag(NewVD->getLocation(), diag::warn_gc_attribute_weak_on_local);
	else {
	assert(!getLangOpts().ObjCAutoRefCount);
	Diag(NewVD->getLocation(), diag::warn_attribute_weak_on_local);
	}
	}

	bool isVM = T->isVariablyModifiedType();
	if (isVM \|\| NewVD->hasAttr<CleanupAttr>() \|\|
	NewVD->hasAttr<BlocksAttr>())
	setFunctionHasBranchProtectedScope();

	if ((isVM && NewVD->hasLinkage()) \|\|
	(T->isVariableArrayType() && NewVD->hasGlobalStorage())) {
	bool SizeIsNegative;
	llvm::APSInt Oversized;
	TypeSourceInfo *FixedTInfo = TryToFixInvalidVariablyModifiedTypeSourceInfo(
	NewVD->getTypeSourceInfo(), Context, SizeIsNegative, Oversized);
	QualType FixedT;
	if (FixedTInfo && T == NewVD->getTypeSourceInfo()->getType())
	FixedT = FixedTInfo->getType();
	else if (FixedTInfo) {
	// Type and type-as-written are canonically different. We need to fix up
	// both types separately.
	FixedT = TryToFixInvalidVariablyModifiedType(T, Context, SizeIsNegative,
	Oversized);
	}
	if ((!FixedTInfo \|\| FixedT.isNull()) && T->isVariableArrayType()) {
	const VariableArrayType *VAT = Context.getAsVariableArrayType(T);
	// FIXME: This won't give the correct result for
	// int a[10][n];
	SourceRange SizeRange = VAT->getSizeExpr()->getSourceRange();

	if (NewVD->isFileVarDecl())
	Diag(NewVD->getLocation(), diag::err_vla_decl_in_file_scope)
	<< SizeRange;
	else if (NewVD->isStaticLocal())
	Diag(NewVD->getLocation(), diag::err_vla_decl_has_static_storage)
	<< SizeRange;
	else
	Diag(NewVD->getLocation(), diag::err_vla_decl_has_extern_linkage)
	<< SizeRange;
	NewVD->setInvalidDecl();
	return;
	}

	if (!FixedTInfo) {
	if (NewVD->isFileVarDecl())
	Diag(NewVD->getLocation(), diag::err_vm_decl_in_file_scope);
	else
	Diag(NewVD->getLocation(), diag::err_vm_decl_has_extern_linkage);
	NewVD->setInvalidDecl();
	return;
	}

	Diag(NewVD->getLocation(), diag::ext_vla_folded_to_constant);
	NewVD->setType(FixedT);
	NewVD->setTypeSourceInfo(FixedTInfo);
	}

	if (T->isVoidType()) {
	// C++98 [dcl.stc]p5: The extern specifier can be applied only to the names
	// of objects and functions.
	if (NewVD->isThisDeclarationADefinition() \|\| getLangOpts().CPlusPlus) {
	Diag(NewVD->getLocation(), diag::err_typecheck_decl_incomplete_type)
	<< T;
	NewVD->setInvalidDecl();
	return;
	}
	}

	if (!NewVD->hasLocalStorage() && NewVD->hasAttr<BlocksAttr>()) {
	Diag(NewVD->getLocation(), diag::err_block_on_nonlocal);
	NewVD->setInvalidDecl();
	return;
	}

	if (!NewVD->hasLocalStorage() && T->isSizelessType()) {
	Diag(NewVD->getLocation(), diag::err_sizeless_nonlocal) << T;
	NewVD->setInvalidDecl();
	return;
	}

	if (isVM && NewVD->hasAttr<BlocksAttr>()) {
	Diag(NewVD->getLocation(), diag::err_block_on_vm);
	NewVD->setInvalidDecl();
	return;
	}

	if (NewVD->isConstexpr() && !T->isDependentType() &&
	RequireLiteralType(NewVD->getLocation(), T,
	diag::err_constexpr_var_non_literal)) {
	NewVD->setInvalidDecl();
	return;
	}

	// PPC MMA non-pointer types are not allowed as non-local variable types.
	if (Context.getTargetInfo().getTriple().isPPC64() &&
	!NewVD->isLocalVarDecl() &&
	CheckPPCMMAType(T, NewVD->getLocation())) {
	NewVD->setInvalidDecl();
	return;
	}

	// Check that SVE types are only used in functions with SVE available.
	- if (T->isSVESizelessBuiltinType() && CurContext->isFunctionOrMethod()) {
	+ if (T->isSVESizelessBuiltinType() && isa<FunctionDecl>(CurContext)) {
	const FunctionDecl *FD = cast<FunctionDecl>(CurContext);
	llvm::StringMap<bool> CallerFeatureMap;
	Context.getFunctionFeatureMap(CallerFeatureMap, FD);
	if (!Builtin::evaluateRequiredTargetFeatures(
	"sve", CallerFeatureMap)) {
	Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T;
	NewVD->setInvalidDecl();
	return;
	}
	}
	}

	/// Perform semantic checking on a newly-created variable
	/// declaration.
	///
	/// This routine performs all of the type-checking required for a
	/// variable declaration once it has been built. It is used both to
	/// check variables after they have been parsed and their declarators
	/// have been translated into a declaration, and to check variables
	/// that have been instantiated from a template.
	///
	/// Sets NewVD->isInvalidDecl() if an error was encountered.
	///
	/// Returns true if the variable declaration is a redeclaration.
	bool Sema::CheckVariableDeclaration(VarDecl *NewVD, LookupResult &Previous) {
	CheckVariableDeclarationType(NewVD);

	// If the decl is already known invalid, don't check it.
	if (NewVD->isInvalidDecl())
	return false;

	// If we did not find anything by this name, look for a non-visible
	// extern "C" declaration with the same name.
	if (Previous.empty() &&
	checkForConflictWithNonVisibleExternC(*this, NewVD, Previous))
	Previous.setShadowed();

	if (!Previous.empty()) {
	MergeVarDecl(NewVD, Previous);
	return true;
	}
	return false;
	}

	/// AddOverriddenMethods - See if a method overrides any in the base classes,
	/// and if so, check that it's a valid override and remember it.
	bool Sema::AddOverriddenMethods(CXXRecordDecl DC, CXXMethodDecl MD) {
	llvm::SmallPtrSet<const CXXMethodDecl*, 4> Overridden;

	// Look for methods in base classes that this method might override.
	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/false,
	/DetectVirtual=/false);
	auto VisitBase = [&] (const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
	CXXRecordDecl *BaseRecord = Specifier->getType()->getAsCXXRecordDecl();
	DeclarationName Name = MD->getDeclName();

	if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
	// We really want to find the base class destructor here.
	QualType T = Context.getTypeDeclType(BaseRecord);
	CanQualType CT = Context.getCanonicalType(T);
	Name = Context.DeclarationNames.getCXXDestructorName(CT);
	}

	for (NamedDecl *BaseND : BaseRecord->lookup(Name)) {
	CXXMethodDecl *BaseMD =
	dyn_cast<CXXMethodDecl>(BaseND->getCanonicalDecl());
	if (!BaseMD \|\| !BaseMD->isVirtual() \|\|
	IsOverload(MD, BaseMD, /UseMemberUsingDeclRules=/false,
	/ConsiderCudaAttrs=/true,
	// C++2a [class.virtual]p2 does not consider requires
	// clauses when overriding.
	/ConsiderRequiresClauses=/false))
	continue;

	if (Overridden.insert(BaseMD).second) {
	MD->addOverriddenMethod(BaseMD);
	CheckOverridingFunctionReturnType(MD, BaseMD);
	CheckOverridingFunctionAttributes(MD, BaseMD);
	CheckOverridingFunctionExceptionSpec(MD, BaseMD);
	CheckIfOverriddenFunctionIsMarkedFinal(MD, BaseMD);
	}

	// A method can only override one function from each base class. We
	// don't track indirectly overridden methods from bases of bases.
	return true;
	}

	return false;
	};

	DC->lookupInBases(VisitBase, Paths);
	return !Overridden.empty();
	}

	namespace {
	// Struct for holding all of the extra arguments needed by
	// DiagnoseInvalidRedeclaration to call Sema::ActOnFunctionDeclarator.
	struct ActOnFDArgs {
	Scope *S;
	Declarator &D;
	MultiTemplateParamsArg TemplateParamLists;
	bool AddToScope;
	};
	} // end anonymous namespace

	namespace {

	// Callback to only accept typo corrections that have a non-zero edit distance.
	// Also only accept corrections that have the same parent decl.
	class DifferentNameValidatorCCC final : public CorrectionCandidateCallback {
	public:
	DifferentNameValidatorCCC(ASTContext &Context, FunctionDecl *TypoFD,
	CXXRecordDecl *Parent)
	: Context(Context), OriginalFD(TypoFD),
	ExpectedParent(Parent ? Parent->getCanonicalDecl() : nullptr) {}

	bool ValidateCandidate(const TypoCorrection &candidate) override {
	if (candidate.getEditDistance() == 0)
	return false;

	SmallVector<unsigned, 1> MismatchedParams;
	for (TypoCorrection::const_decl_iterator CDecl = candidate.begin(),
	CDeclEnd = candidate.end();
	CDecl != CDeclEnd; ++CDecl) {
	FunctionDecl FD = dyn_cast<FunctionDecl>(CDecl);

	if (FD && !FD->hasBody() &&
	hasSimilarParameters(Context, FD, OriginalFD, MismatchedParams)) {
	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
	CXXRecordDecl *Parent = MD->getParent();
	if (Parent && Parent->getCanonicalDecl() == ExpectedParent)
	return true;
	} else if (!ExpectedParent) {
	return true;
	}
	}
	}

	return false;
	}

	std::unique_ptr<CorrectionCandidateCallback> clone() override {
	return std::make_unique<DifferentNameValidatorCCC>(*this);
	}

	private:
	ASTContext &Context;
	FunctionDecl *OriginalFD;
	CXXRecordDecl *ExpectedParent;
	};

	} // end anonymous namespace

	void Sema::MarkTypoCorrectedFunctionDefinition(const NamedDecl *F) {
	TypoCorrectedFunctionDefinitions.insert(F);
	}

	/// Generate diagnostics for an invalid function redeclaration.
	///
	/// This routine handles generating the diagnostic messages for an invalid
	/// function redeclaration, including finding possible similar declarations
	/// or performing typo correction if there are no previous declarations with
	/// the same name.
	///
	/// Returns a NamedDecl iff typo correction was performed and substituting in
	/// the new declaration name does not cause new errors.
	static NamedDecl *DiagnoseInvalidRedeclaration(
	Sema &SemaRef, LookupResult &Previous, FunctionDecl *NewFD,
	ActOnFDArgs &ExtraArgs, bool IsLocalFriend, Scope *S) {
	DeclarationName Name = NewFD->getDeclName();
	DeclContext *NewDC = NewFD->getDeclContext();
	SmallVector<unsigned, 1> MismatchedParams;
	SmallVector<std::pair<FunctionDecl *, unsigned>, 1> NearMatches;
	TypoCorrection Correction;
	bool IsDefinition = ExtraArgs.D.isFunctionDefinition();
	unsigned DiagMsg =
	IsLocalFriend ? diag::err_no_matching_local_friend :
	NewFD->getFriendObjectKind() ? diag::err_qualified_friend_no_match :
	diag::err_member_decl_does_not_match;
	LookupResult Prev(SemaRef, Name, NewFD->getLocation(),
	IsLocalFriend ? Sema::LookupLocalFriendName
	: Sema::LookupOrdinaryName,
	Sema::ForVisibleRedeclaration);

	NewFD->setInvalidDecl();
	if (IsLocalFriend)
	SemaRef.LookupName(Prev, S);
	else
	SemaRef.LookupQualifiedName(Prev, NewDC);
	assert(!Prev.isAmbiguous() &&
	"Cannot have an ambiguity in previous-declaration lookup");
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewFD);
	DifferentNameValidatorCCC CCC(SemaRef.Context, NewFD,
	MD ? MD->getParent() : nullptr);
	if (!Prev.empty()) {
	for (LookupResult::iterator Func = Prev.begin(), FuncEnd = Prev.end();
	Func != FuncEnd; ++Func) {
	FunctionDecl FD = dyn_cast<FunctionDecl>(Func);
	if (FD &&
	hasSimilarParameters(SemaRef.Context, FD, NewFD, MismatchedParams)) {
	// Add 1 to the index so that 0 can mean the mismatch didn't
	// involve a parameter
	unsigned ParamNum =
	MismatchedParams.empty() ? 0 : MismatchedParams.front() + 1;
	NearMatches.push_back(std::make_pair(FD, ParamNum));
	}
	}
	// If the qualified name lookup yielded nothing, try typo correction
	} else if ((Correction = SemaRef.CorrectTypo(
	Prev.getLookupNameInfo(), Prev.getLookupKind(), S,
	&ExtraArgs.D.getCXXScopeSpec(), CCC, Sema::CTK_ErrorRecovery,
	IsLocalFriend ? nullptr : NewDC))) {
	// Set up everything for the call to ActOnFunctionDeclarator
	ExtraArgs.D.SetIdentifier(Correction.getCorrectionAsIdentifierInfo(),
	ExtraArgs.D.getIdentifierLoc());
	Previous.clear();
	Previous.setLookupName(Correction.getCorrection());
	for (TypoCorrection::decl_iterator CDecl = Correction.begin(),
	CDeclEnd = Correction.end();
	CDecl != CDeclEnd; ++CDecl) {
	FunctionDecl FD = dyn_cast<FunctionDecl>(CDecl);
	if (FD && !FD->hasBody() &&
	hasSimilarParameters(SemaRef.Context, FD, NewFD, MismatchedParams)) {
	Previous.addDecl(FD);
	}
	}
	bool wasRedeclaration = ExtraArgs.D.isRedeclaration();

	NamedDecl *Result;
	// Retry building the function declaration with the new previous
	// declarations, and with errors suppressed.
	{
	// Trap errors.
	Sema::SFINAETrap Trap(SemaRef);

	// TODO: Refactor ActOnFunctionDeclarator so that we can call only the
	// pieces need to verify the typo-corrected C++ declaration and hopefully
	// eliminate the need for the parameter pack ExtraArgs.
	Result = SemaRef.ActOnFunctionDeclarator(
	ExtraArgs.S, ExtraArgs.D,
	Correction.getCorrectionDecl()->getDeclContext(),
	NewFD->getTypeSourceInfo(), Previous, ExtraArgs.TemplateParamLists,
	ExtraArgs.AddToScope);

	if (Trap.hasErrorOccurred())
	Result = nullptr;
	}

	if (Result) {
	// Determine which correction we picked.
	Decl *Canonical = Result->getCanonicalDecl();
	for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
	I != E; ++I)
	if ((*I)->getCanonicalDecl() == Canonical)
	Correction.setCorrectionDecl(*I);

	// Let Sema know about the correction.
	SemaRef.MarkTypoCorrectedFunctionDefinition(Result);
	SemaRef.diagnoseTypo(
	Correction,
	SemaRef.PDiag(IsLocalFriend
	? diag::err_no_matching_local_friend_suggest
	: diag::err_member_decl_does_not_match_suggest)
	<< Name << NewDC << IsDefinition);
	return Result;
	}

	// Pretend the typo correction never occurred
	ExtraArgs.D.SetIdentifier(Name.getAsIdentifierInfo(),
	ExtraArgs.D.getIdentifierLoc());
	ExtraArgs.D.setRedeclaration(wasRedeclaration);
	Previous.clear();
	Previous.setLookupName(Name);
	}

	SemaRef.Diag(NewFD->getLocation(), DiagMsg)
	<< Name << NewDC << IsDefinition << NewFD->getLocation();

	bool NewFDisConst = false;
	if (CXXMethodDecl *NewMD = dyn_cast<CXXMethodDecl>(NewFD))
	NewFDisConst = NewMD->isConst();

	for (SmallVectorImpl<std::pair<FunctionDecl *, unsigned> >::iterator
	NearMatch = NearMatches.begin(), NearMatchEnd = NearMatches.end();
	NearMatch != NearMatchEnd; ++NearMatch) {
	FunctionDecl *FD = NearMatch->first;
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
	bool FDisConst = MD && MD->isConst();
	bool IsMember = MD \|\| !IsLocalFriend;

	// FIXME: These notes are poorly worded for the local friend case.
	if (unsigned Idx = NearMatch->second) {
	ParmVarDecl *FDParam = FD->getParamDecl(Idx-1);
	SourceLocation Loc = FDParam->getTypeSpecStartLoc();
	if (Loc.isInvalid()) Loc = FD->getLocation();
	SemaRef.Diag(Loc, IsMember ? diag::note_member_def_close_param_match
	: diag::note_local_decl_close_param_match)
	<< Idx << FDParam->getType()
	<< NewFD->getParamDecl(Idx - 1)->getType();
	} else if (FDisConst != NewFDisConst) {
	SemaRef.Diag(FD->getLocation(), diag::note_member_def_close_const_match)
	<< NewFDisConst << FD->getSourceRange().getEnd()
	<< (NewFDisConst
	? FixItHint::CreateRemoval(ExtraArgs.D.getFunctionTypeInfo()
	.getConstQualifierLoc())
	: FixItHint::CreateInsertion(ExtraArgs.D.getFunctionTypeInfo()
	.getRParenLoc()
	.getLocWithOffset(1),
	" const"));
	} else
	SemaRef.Diag(FD->getLocation(),
	IsMember ? diag::note_member_def_close_match
	: diag::note_local_decl_close_match);
	}
	return nullptr;
	}

	static StorageClass getFunctionStorageClass(Sema &SemaRef, Declarator &D) {
	switch (D.getDeclSpec().getStorageClassSpec()) {
	default: llvm_unreachable("Unknown storage class!");
	case DeclSpec::SCS_auto:
	case DeclSpec::SCS_register:
	case DeclSpec::SCS_mutable:
	SemaRef.Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	diag::err_typecheck_sclass_func);
	D.getMutableDeclSpec().ClearStorageClassSpecs();
	D.setInvalidType();
	break;
	case DeclSpec::SCS_unspecified: break;
	case DeclSpec::SCS_extern:
	if (D.getDeclSpec().isExternInLinkageSpec())
	return SC_None;
	return SC_Extern;
	case DeclSpec::SCS_static: {
	if (SemaRef.CurContext->getRedeclContext()->isFunctionOrMethod()) {
	// C99 6.7.1p5:
	// The declaration of an identifier for a function that has
	// block scope shall have no explicit storage-class specifier
	// other than extern
	// See also (C++ [dcl.stc]p4).
	SemaRef.Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	diag::err_static_block_func);
	break;
	} else
	return SC_Static;
	}
	case DeclSpec::SCS_private_extern: return SC_PrivateExtern;
	}

	// No explicit storage class has already been returned
	return SC_None;
	}

	static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
	DeclContext *DC, QualType &R,
	TypeSourceInfo *TInfo,
	StorageClass SC,
	bool &IsVirtualOkay) {
	DeclarationNameInfo NameInfo = SemaRef.GetNameForDeclarator(D);
	DeclarationName Name = NameInfo.getName();

	FunctionDecl *NewFD = nullptr;
	bool isInline = D.getDeclSpec().isInlineSpecified();

	if (!SemaRef.getLangOpts().CPlusPlus) {
	// Determine whether the function was written with a prototype. This is
	// true when:
	// - there is a prototype in the declarator, or
	// - the type R of the function is some kind of typedef or other non-
	// attributed reference to a type name (which eventually refers to a
	// function type). Note, we can't always look at the adjusted type to
	// check this case because attributes may cause a non-function
	// declarator to still have a function type. e.g.,
	// typedef void func(int a);
	// __attribute__((noreturn)) func other_func; // This has a prototype
	bool HasPrototype =
	(D.isFunctionDeclarator() && D.getFunctionTypeInfo().hasPrototype) \|\|
	(D.getDeclSpec().isTypeRep() &&
	D.getDeclSpec().getRepAsType().get()->isFunctionProtoType()) \|\|
	(!R->getAsAdjusted<FunctionType>() && R->isFunctionProtoType());
	assert(
	(HasPrototype \|\| !SemaRef.getLangOpts().requiresStrictPrototypes()) &&
	"Strict prototypes are required");

	NewFD = FunctionDecl::Create(
	SemaRef.Context, DC, D.getBeginLoc(), NameInfo, R, TInfo, SC,
	SemaRef.getCurFPFeatures().isFPConstrained(), isInline, HasPrototype,
	ConstexprSpecKind::Unspecified,
	/TrailingRequiresClause=/nullptr);
	if (D.isInvalidType())
	NewFD->setInvalidDecl();

	return NewFD;
	}

	ExplicitSpecifier ExplicitSpecifier = D.getDeclSpec().getExplicitSpecifier();

	ConstexprSpecKind ConstexprKind = D.getDeclSpec().getConstexprSpecifier();
	if (ConstexprKind == ConstexprSpecKind::Constinit) {
	SemaRef.Diag(D.getDeclSpec().getConstexprSpecLoc(),
	diag::err_constexpr_wrong_decl_kind)
	<< static_cast<int>(ConstexprKind);
	ConstexprKind = ConstexprSpecKind::Unspecified;
	D.getMutableDeclSpec().ClearConstexprSpec();
	}
	Expr *TrailingRequiresClause = D.getTrailingRequiresClause();

	// Check that the return type is not an abstract class type.
	// For record types, this is done by the AbstractClassUsageDiagnoser once
	// the class has been completely parsed.
	if (!DC->isRecord() &&
	SemaRef.RequireNonAbstractType(
	D.getIdentifierLoc(), R->castAs<FunctionType>()->getReturnType(),
	diag::err_abstract_type_in_decl, SemaRef.AbstractReturnType))
	D.setInvalidType();

	if (Name.getNameKind() == DeclarationName::CXXConstructorName) {
	// This is a C++ constructor declaration.
	assert(DC->isRecord() &&
	"Constructors can only be declared in a member context");

	R = SemaRef.CheckConstructorDeclarator(D, R, SC);
	return CXXConstructorDecl::Create(
	SemaRef.Context, cast<CXXRecordDecl>(DC), D.getBeginLoc(), NameInfo, R,
	TInfo, ExplicitSpecifier, SemaRef.getCurFPFeatures().isFPConstrained(),
	isInline, /isImplicitlyDeclared=/false, ConstexprKind,
	InheritedConstructor(), TrailingRequiresClause);

	} else if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
	// This is a C++ destructor declaration.
	if (DC->isRecord()) {
	R = SemaRef.CheckDestructorDeclarator(D, R, SC);
	CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
	CXXDestructorDecl *NewDD = CXXDestructorDecl::Create(
	SemaRef.Context, Record, D.getBeginLoc(), NameInfo, R, TInfo,
	SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
	/isImplicitlyDeclared=/false, ConstexprKind,
	TrailingRequiresClause);
	// User defined destructors start as not selected if the class definition is still
	// not done.
	if (Record->isBeingDefined())
	NewDD->setIneligibleOrNotSelected(true);

	// If the destructor needs an implicit exception specification, set it
	// now. FIXME: It'd be nice to be able to create the right type to start
	// with, but the type needs to reference the destructor declaration.
	if (SemaRef.getLangOpts().CPlusPlus11)
	SemaRef.AdjustDestructorExceptionSpec(NewDD);

	IsVirtualOkay = true;
	return NewDD;

	} else {
	SemaRef.Diag(D.getIdentifierLoc(), diag::err_destructor_not_member);
	D.setInvalidType();

	// Create a FunctionDecl to satisfy the function definition parsing
	// code path.
	return FunctionDecl::Create(
	SemaRef.Context, DC, D.getBeginLoc(), D.getIdentifierLoc(), Name, R,
	TInfo, SC, SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
	/hasPrototype=/true, ConstexprKind, TrailingRequiresClause);
	}

	} else if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName) {
	if (!DC->isRecord()) {
	SemaRef.Diag(D.getIdentifierLoc(),
	diag::err_conv_function_not_member);
	return nullptr;
	}

	SemaRef.CheckConversionDeclarator(D, R, SC);
	if (D.isInvalidType())
	return nullptr;

	IsVirtualOkay = true;
	return CXXConversionDecl::Create(
	SemaRef.Context, cast<CXXRecordDecl>(DC), D.getBeginLoc(), NameInfo, R,
	TInfo, SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
	ExplicitSpecifier, ConstexprKind, SourceLocation(),
	TrailingRequiresClause);

	} else if (Name.getNameKind() == DeclarationName::CXXDeductionGuideName) {
	if (TrailingRequiresClause)
	SemaRef.Diag(TrailingRequiresClause->getBeginLoc(),
	diag::err_trailing_requires_clause_on_deduction_guide)
	<< TrailingRequiresClause->getSourceRange();
	SemaRef.CheckDeductionGuideDeclarator(D, R, SC);

	return CXXDeductionGuideDecl::Create(SemaRef.Context, DC, D.getBeginLoc(),
	ExplicitSpecifier, NameInfo, R, TInfo,
	D.getEndLoc());
	} else if (DC->isRecord()) {
	// If the name of the function is the same as the name of the record,
	// then this must be an invalid constructor that has a return type.
	// (The parser checks for a return type and makes the declarator a
	// constructor if it has no return type).
	if (Name.getAsIdentifierInfo() &&
	Name.getAsIdentifierInfo() == cast<CXXRecordDecl>(DC)->getIdentifier()){
	SemaRef.Diag(D.getIdentifierLoc(), diag::err_constructor_return_type)
	<< SourceRange(D.getDeclSpec().getTypeSpecTypeLoc())
	<< SourceRange(D.getIdentifierLoc());
	return nullptr;
	}

	// This is a C++ method declaration.
	CXXMethodDecl *Ret = CXXMethodDecl::Create(
	SemaRef.Context, cast<CXXRecordDecl>(DC), D.getBeginLoc(), NameInfo, R,
	TInfo, SC, SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
	ConstexprKind, SourceLocation(), TrailingRequiresClause);
	IsVirtualOkay = !Ret->isStatic();
	return Ret;
	} else {
	bool isFriend =
	SemaRef.getLangOpts().CPlusPlus && D.getDeclSpec().isFriendSpecified();
	if (!isFriend && SemaRef.CurContext->isRecord())
	return nullptr;

	// Determine whether the function was written with a
	// prototype. This true when:
	// - we're in C++ (where every function has a prototype),
	return FunctionDecl::Create(
	SemaRef.Context, DC, D.getBeginLoc(), NameInfo, R, TInfo, SC,
	SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
	true /HasPrototype/, ConstexprKind, TrailingRequiresClause);
	}
	}

	enum OpenCLParamType {
	ValidKernelParam,
	PtrPtrKernelParam,
	PtrKernelParam,
	InvalidAddrSpacePtrKernelParam,
	InvalidKernelParam,
	RecordKernelParam
	};

	static bool isOpenCLSizeDependentType(ASTContext &C, QualType Ty) {
	// Size dependent types are just typedefs to normal integer types
	// (e.g. unsigned long), so we cannot distinguish them from other typedefs to
	// integers other than by their names.
	StringRef SizeTypeNames[] = {"size_t", "intptr_t", "uintptr_t", "ptrdiff_t"};

	// Remove typedefs one by one until we reach a typedef
	// for a size dependent type.
	QualType DesugaredTy = Ty;
	do {
	ArrayRef<StringRef> Names(SizeTypeNames);
	auto Match = llvm::find(Names, DesugaredTy.getUnqualifiedType().getAsString());
	if (Names.end() != Match)
	return true;

	Ty = DesugaredTy;
	DesugaredTy = Ty.getSingleStepDesugaredType(C);
	} while (DesugaredTy != Ty);

	return false;
	}

	static OpenCLParamType getOpenCLKernelParameterType(Sema &S, QualType PT) {
	if (PT->isDependentType())
	return InvalidKernelParam;

	if (PT->isPointerType() \|\| PT->isReferenceType()) {
	QualType PointeeType = PT->getPointeeType();
	if (PointeeType.getAddressSpace() == LangAS::opencl_generic \|\|
	PointeeType.getAddressSpace() == LangAS::opencl_private \|\|
	PointeeType.getAddressSpace() == LangAS::Default)
	return InvalidAddrSpacePtrKernelParam;

	if (PointeeType->isPointerType()) {
	// This is a pointer to pointer parameter.
	// Recursively check inner type.
	OpenCLParamType ParamKind = getOpenCLKernelParameterType(S, PointeeType);
	if (ParamKind == InvalidAddrSpacePtrKernelParam \|\|
	ParamKind == InvalidKernelParam)
	return ParamKind;

	return PtrPtrKernelParam;
	}

	// C++ for OpenCL v1.0 s2.4:
	// Moreover the types used in parameters of the kernel functions must be:
	// Standard layout types for pointer parameters. The same applies to
	// reference if an implementation supports them in kernel parameters.
	if (S.getLangOpts().OpenCLCPlusPlus &&
	!S.getOpenCLOptions().isAvailableOption(
	"__cl_clang_non_portable_kernel_param_types", S.getLangOpts())) {
	auto CXXRec = PointeeType.getCanonicalType()->getAsCXXRecordDecl();
	bool IsStandardLayoutType = true;
	if (CXXRec) {
	// If template type is not ODR-used its definition is only available
	// in the template definition not its instantiation.
	// FIXME: This logic doesn't work for types that depend on template
	// parameter (PR58590).
	if (!CXXRec->hasDefinition())
	CXXRec = CXXRec->getTemplateInstantiationPattern();
	if (!CXXRec \|\| !CXXRec->hasDefinition() \|\| !CXXRec->isStandardLayout())
	IsStandardLayoutType = false;
	}
	if (!PointeeType->isAtomicType() && !PointeeType->isVoidType() &&
	!IsStandardLayoutType)
	return InvalidKernelParam;
	}

	return PtrKernelParam;
	}

	// OpenCL v1.2 s6.9.k:
	// Arguments to kernel functions in a program cannot be declared with the
	// built-in scalar types bool, half, size_t, ptrdiff_t, intptr_t, and
	// uintptr_t or a struct and/or union that contain fields declared to be one
	// of these built-in scalar types.
	if (isOpenCLSizeDependentType(S.getASTContext(), PT))
	return InvalidKernelParam;

	if (PT->isImageType())
	return PtrKernelParam;

	if (PT->isBooleanType() \|\| PT->isEventT() \|\| PT->isReserveIDT())
	return InvalidKernelParam;

	// OpenCL extension spec v1.2 s9.5:
	// This extension adds support for half scalar and vector types as built-in
	// types that can be used for arithmetic operations, conversions etc.
	if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16", S.getLangOpts()) &&
	PT->isHalfType())
	return InvalidKernelParam;

	// Look into an array argument to check if it has a forbidden type.
	if (PT->isArrayType()) {
	const Type *UnderlyingTy = PT->getPointeeOrArrayElementType();
	// Call ourself to check an underlying type of an array. Since the
	// getPointeeOrArrayElementType returns an innermost type which is not an
	// array, this recursive call only happens once.
	return getOpenCLKernelParameterType(S, QualType(UnderlyingTy, 0));
	}

	// C++ for OpenCL v1.0 s2.4:
	// Moreover the types used in parameters of the kernel functions must be:
	// Trivial and standard-layout types C++17 [basic.types] (plain old data
	// types) for parameters passed by value;
	if (S.getLangOpts().OpenCLCPlusPlus &&
	!S.getOpenCLOptions().isAvailableOption(
	"__cl_clang_non_portable_kernel_param_types", S.getLangOpts()) &&
	!PT->isOpenCLSpecificType() && !PT.isPODType(S.Context))
	return InvalidKernelParam;

	if (PT->isRecordType())
	return RecordKernelParam;

	return ValidKernelParam;
	}

	static void checkIsValidOpenCLKernelParameter(
	Sema &S,
	Declarator &D,
	ParmVarDecl *Param,
	llvm::SmallPtrSetImpl<const Type *> &ValidTypes) {
	QualType PT = Param->getType();

	// Cache the valid types we encounter to avoid rechecking structs that are
	// used again
	if (ValidTypes.count(PT.getTypePtr()))
	return;

	switch (getOpenCLKernelParameterType(S, PT)) {
	case PtrPtrKernelParam:
	// OpenCL v3.0 s6.11.a:
	// A kernel function argument cannot be declared as a pointer to a pointer
	// type. [...] This restriction only applies to OpenCL C 1.2 or below.
	if (S.getLangOpts().getOpenCLCompatibleVersion() <= 120) {
	S.Diag(Param->getLocation(), diag::err_opencl_ptrptr_kernel_param);
	D.setInvalidType();
	return;
	}

	ValidTypes.insert(PT.getTypePtr());
	return;

	case InvalidAddrSpacePtrKernelParam:
	// OpenCL v1.0 s6.5:
	// __kernel function arguments declared to be a pointer of a type can point
	// to one of the following address spaces only : __global, __local or
	// __constant.
	S.Diag(Param->getLocation(), diag::err_kernel_arg_address_space);
	D.setInvalidType();
	return;

	// OpenCL v1.2 s6.9.k:
	// Arguments to kernel functions in a program cannot be declared with the
	// built-in scalar types bool, half, size_t, ptrdiff_t, intptr_t, and
	// uintptr_t or a struct and/or union that contain fields declared to be
	// one of these built-in scalar types.

	case InvalidKernelParam:
	// OpenCL v1.2 s6.8 n:
	// A kernel function argument cannot be declared
	// of event_t type.
	// Do not diagnose half type since it is diagnosed as invalid argument
	// type for any function elsewhere.
	if (!PT->isHalfType()) {
	S.Diag(Param->getLocation(), diag::err_bad_kernel_param_type) << PT;

	// Explain what typedefs are involved.
	const TypedefType *Typedef = nullptr;
	while ((Typedef = PT->getAs<TypedefType>())) {
	SourceLocation Loc = Typedef->getDecl()->getLocation();
	// SourceLocation may be invalid for a built-in type.
	if (Loc.isValid())
	S.Diag(Loc, diag::note_entity_declared_at) << PT;
	PT = Typedef->desugar();
	}
	}

	D.setInvalidType();
	return;

	case PtrKernelParam:
	case ValidKernelParam:
	ValidTypes.insert(PT.getTypePtr());
	return;

	case RecordKernelParam:
	break;
	}

	// Track nested structs we will inspect
	SmallVector<const Decl *, 4> VisitStack;

	// Track where we are in the nested structs. Items will migrate from
	// VisitStack to HistoryStack as we do the DFS for bad field.
	SmallVector<const FieldDecl *, 4> HistoryStack;
	HistoryStack.push_back(nullptr);

	// At this point we already handled everything except of a RecordType or
	// an ArrayType of a RecordType.
	assert((PT->isArrayType() \|\| PT->isRecordType()) && "Unexpected type.");
	const RecordType *RecTy =
	PT->getPointeeOrArrayElementType()->getAs<RecordType>();
	const RecordDecl *OrigRecDecl = RecTy->getDecl();

	VisitStack.push_back(RecTy->getDecl());
	assert(VisitStack.back() && "First decl null?");

	do {
	const Decl *Next = VisitStack.pop_back_val();
	if (!Next) {
	assert(!HistoryStack.empty());
	// Found a marker, we have gone up a level
	if (const FieldDecl *Hist = HistoryStack.pop_back_val())
	ValidTypes.insert(Hist->getType().getTypePtr());

	continue;
	}

	// Adds everything except the original parameter declaration (which is not a
	// field itself) to the history stack.
	const RecordDecl *RD;
	if (const FieldDecl *Field = dyn_cast<FieldDecl>(Next)) {
	HistoryStack.push_back(Field);

	QualType FieldTy = Field->getType();
	// Other field types (known to be valid or invalid) are handled while we
	// walk around RecordDecl::fields().
	assert((FieldTy->isArrayType() \|\| FieldTy->isRecordType()) &&
	"Unexpected type.");
	const Type *FieldRecTy = FieldTy->getPointeeOrArrayElementType();

	RD = FieldRecTy->castAs<RecordType>()->getDecl();
	} else {
	RD = cast<RecordDecl>(Next);
	}

	// Add a null marker so we know when we've gone back up a level
	VisitStack.push_back(nullptr);

	for (const auto *FD : RD->fields()) {
	QualType QT = FD->getType();

	if (ValidTypes.count(QT.getTypePtr()))
	continue;

	OpenCLParamType ParamType = getOpenCLKernelParameterType(S, QT);
	if (ParamType == ValidKernelParam)
	continue;

	if (ParamType == RecordKernelParam) {
	VisitStack.push_back(FD);
	continue;
	}

	// OpenCL v1.2 s6.9.p:
	// Arguments to kernel functions that are declared to be a struct or union
	// do not allow OpenCL objects to be passed as elements of the struct or
	// union.
	if (ParamType == PtrKernelParam \|\| ParamType == PtrPtrKernelParam \|\|
	ParamType == InvalidAddrSpacePtrKernelParam) {
	S.Diag(Param->getLocation(),
	diag::err_record_with_pointers_kernel_param)
	<< PT->isUnionType()
	<< PT;
	} else {
	S.Diag(Param->getLocation(), diag::err_bad_kernel_param_type) << PT;
	}

	S.Diag(OrigRecDecl->getLocation(), diag::note_within_field_of_type)
	<< OrigRecDecl->getDeclName();

	// We have an error, now let's go back up through history and show where
	// the offending field came from
	for (ArrayRef<const FieldDecl *>::const_iterator
	I = HistoryStack.begin() + 1,
	E = HistoryStack.end();
	I != E; ++I) {
	const FieldDecl OuterField = I;
	S.Diag(OuterField->getLocation(), diag::note_within_field_of_type)
	<< OuterField->getType();
	}

	S.Diag(FD->getLocation(), diag::note_illegal_field_declared_here)
	<< QT->isPointerType()
	<< QT;
	D.setInvalidType();
	return;
	}
	} while (!VisitStack.empty());
	}

	/// Find the DeclContext in which a tag is implicitly declared if we see an
	/// elaborated type specifier in the specified context, and lookup finds
	/// nothing.
	static DeclContext getTagInjectionContext(DeclContext DC) {
	while (!DC->isFileContext() && !DC->isFunctionOrMethod())
	DC = DC->getParent();
	return DC;
	}

	/// Find the Scope in which a tag is implicitly declared if we see an
	/// elaborated type specifier in the specified context, and lookup finds
	/// nothing.
	static Scope getTagInjectionScope(Scope S, const LangOptions &LangOpts) {
	while (S->isClassScope() \|\|
	(LangOpts.CPlusPlus &&
	S->isFunctionPrototypeScope()) \|\|
	((S->getFlags() & Scope::DeclScope) == 0) \|\|
	(S->getEntity() && S->getEntity()->isTransparentContext()))
	S = S->getParent();
	return S;
	}

	/// Determine whether a declaration matches a known function in namespace std.
	static bool isStdBuiltin(ASTContext &Ctx, FunctionDecl *FD,
	unsigned BuiltinID) {
	switch (BuiltinID) {
	case Builtin::BI__GetExceptionInfo:
	// No type checking whatsoever.
	return Ctx.getTargetInfo().getCXXABI().isMicrosoft();

	case Builtin::BIaddressof:
	case Builtin::BI__addressof:
	case Builtin::BIforward:
	case Builtin::BImove:
	case Builtin::BImove_if_noexcept:
	case Builtin::BIas_const: {
	// Ensure that we don't treat the algorithm
	// OutputIt std::move(InputIt, InputIt, OutputIt)
	// as the builtin std::move.
	const auto *FPT = FD->getType()->castAs<FunctionProtoType>();
	return FPT->getNumParams() == 1 && !FPT->isVariadic();
	}

	default:
	return false;
	}
	}

	NamedDecl*
	Sema::ActOnFunctionDeclarator(Scope S, Declarator &D, DeclContext DC,
	TypeSourceInfo *TInfo, LookupResult &Previous,
	MultiTemplateParamsArg TemplateParamListsRef,
	bool &AddToScope) {
	QualType R = TInfo->getType();

	assert(R->isFunctionType());
	if (R.getCanonicalType()->castAs<FunctionType>()->getCmseNSCallAttr())
	Diag(D.getIdentifierLoc(), diag::err_function_decl_cmse_ns_call);

	SmallVector<TemplateParameterList *, 4> TemplateParamLists;
	llvm::append_range(TemplateParamLists, TemplateParamListsRef);
	if (TemplateParameterList *Invented = D.getInventedTemplateParameterList()) {
	if (!TemplateParamLists.empty() &&
	Invented->getDepth() == TemplateParamLists.back()->getDepth())
	TemplateParamLists.back() = Invented;
	else
	TemplateParamLists.push_back(Invented);
	}

	// TODO: consider using NameInfo for diagnostic.
	DeclarationNameInfo NameInfo = GetNameForDeclarator(D);
	DeclarationName Name = NameInfo.getName();
	StorageClass SC = getFunctionStorageClass(*this, D);

	if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_invalid_thread)
	<< DeclSpec::getSpecifierName(TSCS);

	if (D.isFirstDeclarationOfMember())
	adjustMemberFunctionCC(R, D.isStaticMember(), D.isCtorOrDtor(),
	D.getIdentifierLoc());

	bool isFriend = false;
	FunctionTemplateDecl *FunctionTemplate = nullptr;
	bool isMemberSpecialization = false;
	bool isFunctionTemplateSpecialization = false;

	bool isDependentClassScopeExplicitSpecialization = false;
	bool HasExplicitTemplateArgs = false;
	TemplateArgumentListInfo TemplateArgs;

	bool isVirtualOkay = false;

	DeclContext *OriginalDC = DC;
	bool IsLocalExternDecl = adjustContextForLocalExternDecl(DC);

	FunctionDecl NewFD = CreateNewFunctionDecl(this, D, DC, R, TInfo, SC,
	isVirtualOkay);
	if (!NewFD) return nullptr;

	if (OriginalLexicalContext && OriginalLexicalContext->isObjCContainer())
	NewFD->setTopLevelDeclInObjCContainer();

	// Set the lexical context. If this is a function-scope declaration, or has a
	// C++ scope specifier, or is the object of a friend declaration, the lexical
	// context will be different from the semantic context.
	NewFD->setLexicalDeclContext(CurContext);

	if (IsLocalExternDecl)
	NewFD->setLocalExternDecl();

	if (getLangOpts().CPlusPlus) {
	// The rules for implicit inlines changed in C++20 for methods and friends
	// with an in-class definition (when such a definition is not attached to
	// the global module). User-specified 'inline' overrides this (set when
	// the function decl is created above).
	// FIXME: We need a better way to separate C++ standard and clang modules.
	bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules \|\|
	!NewFD->getOwningModule() \|\|
	NewFD->getOwningModule()->isGlobalModule() \|\|
	NewFD->getOwningModule()->isHeaderLikeModule();
	bool isInline = D.getDeclSpec().isInlineSpecified();
	bool isVirtual = D.getDeclSpec().isVirtualSpecified();
	bool hasExplicit = D.getDeclSpec().hasExplicitSpecifier();
	isFriend = D.getDeclSpec().isFriendSpecified();
	if (isFriend && !isInline && D.isFunctionDefinition()) {
	// Pre-C++20 [class.friend]p5
	// A function can be defined in a friend declaration of a
	// class . . . . Such a function is implicitly inline.
	// Post C++20 [class.friend]p7
	// Such a function is implicitly an inline function if it is attached
	// to the global module.
	NewFD->setImplicitlyInline(ImplicitInlineCXX20);
	}

	// If this is a method defined in an __interface, and is not a constructor
	// or an overloaded operator, then set the pure flag (isVirtual will already
	// return true).
	if (const CXXRecordDecl *Parent =
	dyn_cast<CXXRecordDecl>(NewFD->getDeclContext())) {
	if (Parent->isInterface() && cast<CXXMethodDecl>(NewFD)->isUserProvided())
	NewFD->setPure(true);

	// C++ [class.union]p2
	// A union can have member functions, but not virtual functions.
	if (isVirtual && Parent->isUnion()) {
	Diag(D.getDeclSpec().getVirtualSpecLoc(), diag::err_virtual_in_union);
	NewFD->setInvalidDecl();
	}
	if ((Parent->isClass() \|\| Parent->isStruct()) &&
	Parent->hasAttr<SYCLSpecialClassAttr>() &&
	NewFD->getKind() == Decl::Kind::CXXMethod && NewFD->getIdentifier() &&
	NewFD->getName() == "__init" && D.isFunctionDefinition()) {
	if (auto *Def = Parent->getDefinition())
	Def->setInitMethod(true);
	}
	}

	SetNestedNameSpecifier(*this, NewFD, D);
	isMemberSpecialization = false;
	isFunctionTemplateSpecialization = false;
	if (D.isInvalidType())
	NewFD->setInvalidDecl();

	// Match up the template parameter lists with the scope specifier, then
	// determine whether we have a template or a template specialization.
	bool Invalid = false;
	TemplateParameterList *TemplateParams =
	MatchTemplateParametersToScopeSpecifier(
	D.getDeclSpec().getBeginLoc(), D.getIdentifierLoc(),
	D.getCXXScopeSpec(),
	D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId
	? D.getName().TemplateId
	: nullptr,
	TemplateParamLists, isFriend, isMemberSpecialization,
	Invalid);
	if (TemplateParams) {
	// Check that we can declare a template here.
	if (CheckTemplateDeclScope(S, TemplateParams))
	NewFD->setInvalidDecl();

	if (TemplateParams->size() > 0) {
	// This is a function template

	// A destructor cannot be a template.
	if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
	Diag(NewFD->getLocation(), diag::err_destructor_template);
	NewFD->setInvalidDecl();
	}

	// If we're adding a template to a dependent context, we may need to
	// rebuilding some of the types used within the template parameter list,
	// now that we know what the current instantiation is.
	if (DC->isDependentContext()) {
	ContextRAII SavedContext(*this, DC);
	if (RebuildTemplateParamsInCurrentInstantiation(TemplateParams))
	Invalid = true;
	}

	FunctionTemplate = FunctionTemplateDecl::Create(Context, DC,
	NewFD->getLocation(),
	Name, TemplateParams,
	NewFD);
	FunctionTemplate->setLexicalDeclContext(CurContext);
	NewFD->setDescribedFunctionTemplate(FunctionTemplate);

	// For source fidelity, store the other template param lists.
	if (TemplateParamLists.size() > 1) {
	NewFD->setTemplateParameterListsInfo(Context,
	ArrayRef<TemplateParameterList *>(TemplateParamLists)
	.drop_back(1));
	}
	} else {
	// This is a function template specialization.
	isFunctionTemplateSpecialization = true;
	// For source fidelity, store all the template param lists.
	if (TemplateParamLists.size() > 0)
	NewFD->setTemplateParameterListsInfo(Context, TemplateParamLists);

	// C++0x [temp.expl.spec]p20 forbids "template<> friend void foo(int);".
	if (isFriend) {
	// We want to remove the "template<>", found here.
	SourceRange RemoveRange = TemplateParams->getSourceRange();

	// If we remove the template<> and the name is not a
	// template-id, we're actually silently creating a problem:
	// the friend declaration will refer to an untemplated decl,
	// and clearly the user wants a template specialization. So
	// we need to insert '<>' after the name.
	SourceLocation InsertLoc;
	if (D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) {
	InsertLoc = D.getName().getSourceRange().getEnd();
	InsertLoc = getLocForEndOfToken(InsertLoc);
	}

	Diag(D.getIdentifierLoc(), diag::err_template_spec_decl_friend)
	<< Name << RemoveRange
	<< FixItHint::CreateRemoval(RemoveRange)
	<< FixItHint::CreateInsertion(InsertLoc, "<>");
	Invalid = true;
	}
	}
	} else {
	// Check that we can declare a template here.
	if (!TemplateParamLists.empty() && isMemberSpecialization &&
	CheckTemplateDeclScope(S, TemplateParamLists.back()))
	NewFD->setInvalidDecl();

	// All template param lists were matched against the scope specifier:
	// this is NOT (an explicit specialization of) a template.
	if (TemplateParamLists.size() > 0)
	// For source fidelity, store all the template param lists.
	NewFD->setTemplateParameterListsInfo(Context, TemplateParamLists);
	}

	if (Invalid) {
	NewFD->setInvalidDecl();
	if (FunctionTemplate)
	FunctionTemplate->setInvalidDecl();
	}

	// C++ [dcl.fct.spec]p5:
	// The virtual specifier shall only be used in declarations of
	// nonstatic class member functions that appear within a
	// member-specification of a class declaration; see 10.3.
	//
	if (isVirtual && !NewFD->isInvalidDecl()) {
	if (!isVirtualOkay) {
	Diag(D.getDeclSpec().getVirtualSpecLoc(),
	diag::err_virtual_non_function);
	} else if (!CurContext->isRecord()) {
	// 'virtual' was specified outside of the class.
	Diag(D.getDeclSpec().getVirtualSpecLoc(),
	diag::err_virtual_out_of_class)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getVirtualSpecLoc());
	} else if (NewFD->getDescribedFunctionTemplate()) {
	// C++ [temp.mem]p3:
	// A member function template shall not be virtual.
	Diag(D.getDeclSpec().getVirtualSpecLoc(),
	diag::err_virtual_member_function_template)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getVirtualSpecLoc());
	} else {
	// Okay: Add virtual to the method.
	NewFD->setVirtualAsWritten(true);
	}

	if (getLangOpts().CPlusPlus14 &&
	NewFD->getReturnType()->isUndeducedType())
	Diag(D.getDeclSpec().getVirtualSpecLoc(), diag::err_auto_fn_virtual);
	}

	if (getLangOpts().CPlusPlus14 &&
	(NewFD->isDependentContext() \|\|
	(isFriend && CurContext->isDependentContext())) &&
	NewFD->getReturnType()->isUndeducedType()) {
	// If the function template is referenced directly (for instance, as a
	// member of the current instantiation), pretend it has a dependent type.
	// This is not really justified by the standard, but is the only sane
	// thing to do.
	// FIXME: For a friend function, we have not marked the function as being
	// a friend yet, so 'isDependentContext' on the FD doesn't work.
	const FunctionProtoType *FPT =
	NewFD->getType()->castAs<FunctionProtoType>();
	QualType Result = SubstAutoTypeDependent(FPT->getReturnType());
	NewFD->setType(Context.getFunctionType(Result, FPT->getParamTypes(),
	FPT->getExtProtoInfo()));
	}

	// C++ [dcl.fct.spec]p3:
	// The inline specifier shall not appear on a block scope function
	// declaration.
	if (isInline && !NewFD->isInvalidDecl()) {
	if (CurContext->isFunctionOrMethod()) {
	// 'inline' is not allowed on block scope function declaration.
	Diag(D.getDeclSpec().getInlineSpecLoc(),
	diag::err_inline_declaration_block_scope) << Name
	<< FixItHint::CreateRemoval(D.getDeclSpec().getInlineSpecLoc());
	}
	}

	// C++ [dcl.fct.spec]p6:
	// The explicit specifier shall be used only in the declaration of a
	// constructor or conversion function within its class definition;
	// see 12.3.1 and 12.3.2.
	if (hasExplicit && !NewFD->isInvalidDecl() &&
	!isa<CXXDeductionGuideDecl>(NewFD)) {
	if (!CurContext->isRecord()) {
	// 'explicit' was specified outside of the class.
	Diag(D.getDeclSpec().getExplicitSpecLoc(),
	diag::err_explicit_out_of_class)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getExplicitSpecRange());
	} else if (!isa<CXXConstructorDecl>(NewFD) &&
	!isa<CXXConversionDecl>(NewFD)) {
	// 'explicit' was specified on a function that wasn't a constructor
	// or conversion function.
	Diag(D.getDeclSpec().getExplicitSpecLoc(),
	diag::err_explicit_non_ctor_or_conv_function)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getExplicitSpecRange());
	}
	}

	ConstexprSpecKind ConstexprKind = D.getDeclSpec().getConstexprSpecifier();
	if (ConstexprKind != ConstexprSpecKind::Unspecified) {
	// C++11 [dcl.constexpr]p2: constexpr functions and constexpr constructors
	// are implicitly inline.
	NewFD->setImplicitlyInline();

	// C++11 [dcl.constexpr]p3: functions declared constexpr are required to
	// be either constructors or to return a literal type. Therefore,
	// destructors cannot be declared constexpr.
	if (isa<CXXDestructorDecl>(NewFD) &&
	(!getLangOpts().CPlusPlus20 \|\|
	ConstexprKind == ConstexprSpecKind::Consteval)) {
	Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_constexpr_dtor)
	<< static_cast<int>(ConstexprKind);
	NewFD->setConstexprKind(getLangOpts().CPlusPlus20
	? ConstexprSpecKind::Unspecified
	: ConstexprSpecKind::Constexpr);
	}
	// C++20 [dcl.constexpr]p2: An allocation function, or a
	// deallocation function shall not be declared with the consteval
	// specifier.
	if (ConstexprKind == ConstexprSpecKind::Consteval &&
	(NewFD->getOverloadedOperator() == OO_New \|\|
	NewFD->getOverloadedOperator() == OO_Array_New \|\|
	NewFD->getOverloadedOperator() == OO_Delete \|\|
	NewFD->getOverloadedOperator() == OO_Array_Delete)) {
	Diag(D.getDeclSpec().getConstexprSpecLoc(),
	diag::err_invalid_consteval_decl_kind)
	<< NewFD;
	NewFD->setConstexprKind(ConstexprSpecKind::Constexpr);
	}
	}

	// If __module_private__ was specified, mark the function accordingly.
	if (D.getDeclSpec().isModulePrivateSpecified()) {
	if (isFunctionTemplateSpecialization) {
	SourceLocation ModulePrivateLoc
	= D.getDeclSpec().getModulePrivateSpecLoc();
	Diag(ModulePrivateLoc, diag::err_module_private_specialization)
	<< 0
	<< FixItHint::CreateRemoval(ModulePrivateLoc);
	} else {
	NewFD->setModulePrivate();
	if (FunctionTemplate)
	FunctionTemplate->setModulePrivate();
	}
	}

	if (isFriend) {
	if (FunctionTemplate) {
	FunctionTemplate->setObjectOfFriendDecl();
	FunctionTemplate->setAccess(AS_public);
	}
	NewFD->setObjectOfFriendDecl();
	NewFD->setAccess(AS_public);
	}

	// If a function is defined as defaulted or deleted, mark it as such now.
	// We'll do the relevant checks on defaulted / deleted functions later.
	switch (D.getFunctionDefinitionKind()) {
	case FunctionDefinitionKind::Declaration:
	case FunctionDefinitionKind::Definition:
	break;

	case FunctionDefinitionKind::Defaulted:
	NewFD->setDefaulted();
	break;

	case FunctionDefinitionKind::Deleted:
	NewFD->setDeletedAsWritten();
	break;
	}

	if (isa<CXXMethodDecl>(NewFD) && DC == CurContext &&
	D.isFunctionDefinition() && !isInline) {
	// Pre C++20 [class.mfct]p2:
	// A member function may be defined (8.4) in its class definition, in
	// which case it is an inline member function (7.1.2)
	// Post C++20 [class.mfct]p1:
	// If a member function is attached to the global module and is defined
	// in its class definition, it is inline.
	NewFD->setImplicitlyInline(ImplicitInlineCXX20);
	}

	if (SC == SC_Static && isa<CXXMethodDecl>(NewFD) &&
	!CurContext->isRecord()) {
	// C++ [class.static]p1:
	// A data or function member of a class may be declared static
	// in a class definition, in which case it is a static member of
	// the class.

	// Complain about the 'static' specifier if it's on an out-of-line
	// member function definition.

	// MSVC permits the use of a 'static' storage specifier on an out-of-line
	// member function template declaration and class member template
	// declaration (MSVC versions before 2015), warn about this.
	Diag(D.getDeclSpec().getStorageClassSpecLoc(),
	((!getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
	cast<CXXRecordDecl>(DC)->getDescribedClassTemplate()) \|\|
	(getLangOpts().MSVCCompat && NewFD->getDescribedFunctionTemplate()))
	? diag::ext_static_out_of_line : diag::err_static_out_of_line)
	<< FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
	}

	// C++11 [except.spec]p15:
	// A deallocation function with no exception-specification is treated
	// as if it were specified with noexcept(true).
	const FunctionProtoType *FPT = R->getAs<FunctionProtoType>();
	if ((Name.getCXXOverloadedOperator() == OO_Delete \|\|
	Name.getCXXOverloadedOperator() == OO_Array_Delete) &&
	getLangOpts().CPlusPlus11 && FPT && !FPT->hasExceptionSpec())
	NewFD->setType(Context.getFunctionType(
	FPT->getReturnType(), FPT->getParamTypes(),
	FPT->getExtProtoInfo().withExceptionSpec(EST_BasicNoexcept)));

	// C++20 [dcl.inline]/7
	// If an inline function or variable that is attached to a named module
	// is declared in a definition domain, it shall be defined in that
	// domain.
	// So, if the current declaration does not have a definition, we must
	// check at the end of the TU (or when the PMF starts) to see that we
	// have a definition at that point.
	if (isInline && !D.isFunctionDefinition() && getLangOpts().CPlusPlus20 &&
	NewFD->hasOwningModule() &&
	NewFD->getOwningModule()->isModulePurview()) {
	PendingInlineFuncDecls.insert(NewFD);
	}
	}

	// Filter out previous declarations that don't match the scope.
	FilterLookupForScope(Previous, OriginalDC, S, shouldConsiderLinkage(NewFD),
	D.getCXXScopeSpec().isNotEmpty() \|\|
	isMemberSpecialization \|\|
	isFunctionTemplateSpecialization);

	// Handle GNU asm-label extension (encoded as an attribute).
	if (Expr E = (Expr) D.getAsmLabel()) {
	// The parser guarantees this is a string.
	StringLiteral *SE = cast<StringLiteral>(E);
	NewFD->addAttr(AsmLabelAttr::Create(Context, SE->getString(),
	/IsLiteralLabel=/true,
	SE->getStrTokenLoc(0)));
	} else if (!ExtnameUndeclaredIdentifiers.empty()) {
	llvm::DenseMap<IdentifierInfo,AsmLabelAttr>::iterator I =
	ExtnameUndeclaredIdentifiers.find(NewFD->getIdentifier());
	if (I != ExtnameUndeclaredIdentifiers.end()) {
	if (isDeclExternC(NewFD)) {
	NewFD->addAttr(I->second);
	ExtnameUndeclaredIdentifiers.erase(I);
	} else
	Diag(NewFD->getLocation(), diag::warn_redefine_extname_not_applied)
	<< /Variable/0 << NewFD;
	}
	}

	// Copy the parameter declarations from the declarator D to the function
	// declaration NewFD, if they are available. First scavenge them into Params.
	SmallVector<ParmVarDecl*, 16> Params;
	unsigned FTIIdx;
	if (D.isFunctionDeclarator(FTIIdx)) {
	DeclaratorChunk::FunctionTypeInfo &FTI = D.getTypeObject(FTIIdx).Fun;

	// Check for C99 6.7.5.3p10 - foo(void) is a non-varargs
	// function that takes no arguments, not a function that takes a
	// single void argument.
	// We let through "const void" here because Sema::GetTypeForDeclarator
	// already checks for that case.
	if (FTIHasNonVoidParameters(FTI) && FTI.Params[0].Param) {
	for (unsigned i = 0, e = FTI.NumParams; i != e; ++i) {
	ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[i].Param);
	assert(Param->getDeclContext() != NewFD && "Was set before ?");
	Param->setDeclContext(NewFD);
	Params.push_back(Param);

	if (Param->isInvalidDecl())
	NewFD->setInvalidDecl();
	}
	}

	if (!getLangOpts().CPlusPlus) {
	// In C, find all the tag declarations from the prototype and move them
	// into the function DeclContext. Remove them from the surrounding tag
	// injection context of the function, which is typically but not always
	// the TU.
	DeclContext *PrototypeTagContext =
	getTagInjectionContext(NewFD->getLexicalDeclContext());
	for (NamedDecl *NonParmDecl : FTI.getDeclsInPrototype()) {
	auto *TD = dyn_cast<TagDecl>(NonParmDecl);

	// We don't want to reparent enumerators. Look at their parent enum
	// instead.
	if (!TD) {
	if (auto *ECD = dyn_cast<EnumConstantDecl>(NonParmDecl))
	TD = cast<EnumDecl>(ECD->getDeclContext());
	}
	if (!TD)
	continue;
	DeclContext *TagDC = TD->getLexicalDeclContext();
	if (!TagDC->containsDecl(TD))
	continue;
	TagDC->removeDecl(TD);
	TD->setDeclContext(NewFD);
	NewFD->addDecl(TD);

	// Preserve the lexical DeclContext if it is not the surrounding tag
	// injection context of the FD. In this example, the semantic context of
	// E will be f and the lexical context will be S, while both the
	// semantic and lexical contexts of S will be f:
	// void f(struct S { enum E { a } f; } s);
	if (TagDC != PrototypeTagContext)
	TD->setLexicalDeclContext(TagDC);
	}
	}
	} else if (const FunctionProtoType *FT = R->getAs<FunctionProtoType>()) {
	// When we're declaring a function with a typedef, typeof, etc as in the
	// following example, we'll need to synthesize (unnamed)
	// parameters for use in the declaration.
	//
	// @code
	// typedef void fn(int);
	// fn f;
	// @endcode

	// Synthesize a parameter for each argument type.
	for (const auto &AI : FT->param_types()) {
	ParmVarDecl *Param =
	BuildParmVarDeclForTypedef(NewFD, D.getIdentifierLoc(), AI);
	Param->setScopeInfo(0, Params.size());
	Params.push_back(Param);
	}
	} else {
	assert(R->isFunctionNoProtoType() && NewFD->getNumParams() == 0 &&
	"Should not need args for typedef of non-prototype fn");
	}

	// Finally, we know we have the right number of parameters, install them.
	NewFD->setParams(Params);

	if (D.getDeclSpec().isNoreturnSpecified())
	NewFD->addAttr(C11NoReturnAttr::Create(Context,
	D.getDeclSpec().getNoreturnSpecLoc(),
	AttributeCommonInfo::AS_Keyword));

	// Functions returning a variably modified type violate C99 6.7.5.2p2
	// because all functions have linkage.
	if (!NewFD->isInvalidDecl() &&
	NewFD->getReturnType()->isVariablyModifiedType()) {
	Diag(NewFD->getLocation(), diag::err_vm_func_decl);
	NewFD->setInvalidDecl();
	}

	// Apply an implicit SectionAttr if '#pragma clang section text' is active
	if (PragmaClangTextSection.Valid && D.isFunctionDefinition() &&
	!NewFD->hasAttr<SectionAttr>())
	NewFD->addAttr(PragmaClangTextSectionAttr::CreateImplicit(
	Context, PragmaClangTextSection.SectionName,
	PragmaClangTextSection.PragmaLocation, AttributeCommonInfo::AS_Pragma));

	// Apply an implicit SectionAttr if #pragma code_seg is active.
	if (CodeSegStack.CurrentValue && D.isFunctionDefinition() &&
	!NewFD->hasAttr<SectionAttr>()) {
	NewFD->addAttr(SectionAttr::CreateImplicit(
	Context, CodeSegStack.CurrentValue->getString(),
	CodeSegStack.CurrentPragmaLocation, AttributeCommonInfo::AS_Pragma,
	SectionAttr::Declspec_allocate));
	if (UnifySection(CodeSegStack.CurrentValue->getString(),
	ASTContext::PSF_Implicit \| ASTContext::PSF_Execute \|
	ASTContext::PSF_Read,
	NewFD))
	NewFD->dropAttr<SectionAttr>();
	}

	// Apply an implicit StrictGuardStackCheckAttr if #pragma strict_gs_check is
	// active.
	if (StrictGuardStackCheckStack.CurrentValue && D.isFunctionDefinition() &&
	!NewFD->hasAttr<StrictGuardStackCheckAttr>())
	NewFD->addAttr(StrictGuardStackCheckAttr::CreateImplicit(
	Context, PragmaClangTextSection.PragmaLocation,
	AttributeCommonInfo::AS_Pragma));

	// Apply an implicit CodeSegAttr from class declspec or
	// apply an implicit SectionAttr from #pragma code_seg if active.
	if (!NewFD->hasAttr<CodeSegAttr>()) {
	if (Attr *SAttr = getImplicitCodeSegOrSectionAttrForFunction(NewFD,
	D.isFunctionDefinition())) {
	NewFD->addAttr(SAttr);
	}
	}

	// Handle attributes.
	ProcessDeclAttributes(S, NewFD, D);
	const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
	if (NewTVA && !NewTVA->isDefaultVersion() &&
	!Context.getTargetInfo().hasFeature("fmv")) {
	// Don't add to scope fmv functions declarations if fmv disabled
	AddToScope = false;
	return NewFD;
	}

	if (getLangOpts().OpenCL) {
	// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
	// type declaration will generate a compilation error.
	LangAS AddressSpace = NewFD->getReturnType().getAddressSpace();
	if (AddressSpace != LangAS::Default) {
	Diag(NewFD->getLocation(), diag::err_return_value_with_address_space);
	NewFD->setInvalidDecl();
	}
	}

	if (getLangOpts().HLSL) {
	auto &TargetInfo = getASTContext().getTargetInfo();
	// Skip operator overload which not identifier.
	// Also make sure NewFD is in translation-unit scope.
	if (!NewFD->isInvalidDecl() && Name.isIdentifier() &&
	NewFD->getName() == TargetInfo.getTargetOpts().HLSLEntry &&
	S->getDepth() == 0) {
	CheckHLSLEntryPoint(NewFD);
	if (!NewFD->isInvalidDecl()) {
	auto Env = TargetInfo.getTriple().getEnvironment();
	AttributeCommonInfo AL(NewFD->getBeginLoc());
	HLSLShaderAttr::ShaderType ShaderType =
	static_cast<HLSLShaderAttr::ShaderType>(
	hlsl::getStageFromEnvironment(Env));
	// To share code with HLSLShaderAttr, add HLSLShaderAttr to entry
	// function.
	if (HLSLShaderAttr *Attr = mergeHLSLShaderAttr(NewFD, AL, ShaderType))
	NewFD->addAttr(Attr);
	}
	}
	// HLSL does not support specifying an address space on a function return
	// type.
	LangAS AddressSpace = NewFD->getReturnType().getAddressSpace();
	if (AddressSpace != LangAS::Default) {
	Diag(NewFD->getLocation(), diag::err_return_value_with_address_space);
	NewFD->setInvalidDecl();
	}
	}

	if (!getLangOpts().CPlusPlus) {
	// Perform semantic checking on the function declaration.
	if (!NewFD->isInvalidDecl() && NewFD->isMain())
	CheckMain(NewFD, D.getDeclSpec());

	if (!NewFD->isInvalidDecl() && NewFD->isMSVCRTEntryPoint())
	CheckMSVCRTEntryPoint(NewFD);

	if (!NewFD->isInvalidDecl())
	D.setRedeclaration(CheckFunctionDeclaration(S, NewFD, Previous,
	isMemberSpecialization,
	D.isFunctionDefinition()));
	else if (!Previous.empty())
	// Recover gracefully from an invalid redeclaration.
	D.setRedeclaration(true);
	assert((NewFD->isInvalidDecl() \|\| !D.isRedeclaration() \|\|
	Previous.getResultKind() != LookupResult::FoundOverloaded) &&
	"previous declaration set still overloaded");

	// Diagnose no-prototype function declarations with calling conventions that
	// don't support variadic calls. Only do this in C and do it after merging
	// possibly prototyped redeclarations.
	const FunctionType *FT = NewFD->getType()->castAs<FunctionType>();
	if (isa<FunctionNoProtoType>(FT) && !D.isFunctionDefinition()) {
	CallingConv CC = FT->getExtInfo().getCC();
	if (!supportsVariadicCall(CC)) {
	// Windows system headers sometimes accidentally use stdcall without
	// (void) parameters, so we relax this to a warning.
	int DiagID =
	CC == CC_X86StdCall ? diag::warn_cconv_knr : diag::err_cconv_knr;
	Diag(NewFD->getLocation(), DiagID)
	<< FunctionType::getNameForCallConv(CC);
	}
	}

	if (NewFD->getReturnType().hasNonTrivialToPrimitiveDestructCUnion() \|\|
	NewFD->getReturnType().hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnion(NewFD->getReturnType(),
	NewFD->getReturnTypeSourceRange().getBegin(),
	NTCUC_FunctionReturn, NTCUK_Destruct\|NTCUK_Copy);
	} else {
	// C++11 [replacement.functions]p3:
	// The program's definitions shall not be specified as inline.
	//
	// N.B. We diagnose declarations instead of definitions per LWG issue 2340.
	//
	// Suppress the diagnostic if the function is __attribute__((used)), since
	// that forces an external definition to be emitted.
	if (D.getDeclSpec().isInlineSpecified() &&
	NewFD->isReplaceableGlobalAllocationFunction() &&
	!NewFD->hasAttr<UsedAttr>())
	Diag(D.getDeclSpec().getInlineSpecLoc(),
	diag::ext_operator_new_delete_declared_inline)
	<< NewFD->getDeclName();

	// If the declarator is a template-id, translate the parser's template
	// argument list into our AST format.
	if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) {
	TemplateIdAnnotation *TemplateId = D.getName().TemplateId;
	TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc);
	TemplateArgs.setRAngleLoc(TemplateId->RAngleLoc);
	ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
	TemplateId->NumArgs);
	translateTemplateArguments(TemplateArgsPtr,
	TemplateArgs);

	HasExplicitTemplateArgs = true;

	if (NewFD->isInvalidDecl()) {
	HasExplicitTemplateArgs = false;
	} else if (FunctionTemplate) {
	// Function template with explicit template arguments.
	Diag(D.getIdentifierLoc(), diag::err_function_template_partial_spec)
	<< SourceRange(TemplateId->LAngleLoc, TemplateId->RAngleLoc);

	HasExplicitTemplateArgs = false;
	} else {
	assert((isFunctionTemplateSpecialization \|\|
	D.getDeclSpec().isFriendSpecified()) &&
	"should have a 'template<>' for this decl");
	// "friend void foo<>(int);" is an implicit specialization decl.
	isFunctionTemplateSpecialization = true;
	}
	} else if (isFriend && isFunctionTemplateSpecialization) {
	// This combination is only possible in a recovery case; the user
	// wrote something like:
	// template <> friend void foo(int);
	// which we're recovering from as if the user had written:
	// friend void foo<>(int);
	// Go ahead and fake up a template id.
	HasExplicitTemplateArgs = true;
	TemplateArgs.setLAngleLoc(D.getIdentifierLoc());
	TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
	}

	// We do not add HD attributes to specializations here because
	// they may have different constexpr-ness compared to their
	// templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
	// may end up with different effective targets. Instead, a
	// specialization inherits its target attributes from its template
	// in the CheckFunctionTemplateSpecialization() call below.
	if (getLangOpts().CUDA && !isFunctionTemplateSpecialization)
	maybeAddCUDAHostDeviceAttrs(NewFD, Previous);

	// If it's a friend (and only if it's a friend), it's possible
	// that either the specialized function type or the specialized
	// template is dependent, and therefore matching will fail. In
	// this case, don't check the specialization yet.
	if (isFunctionTemplateSpecialization && isFriend &&
	(NewFD->getType()->isDependentType() \|\| DC->isDependentContext() \|\|
	TemplateSpecializationType::anyInstantiationDependentTemplateArguments(
	TemplateArgs.arguments()))) {
	assert(HasExplicitTemplateArgs &&
	"friend function specialization without template args");
	if (CheckDependentFunctionTemplateSpecialization(NewFD, TemplateArgs,
	Previous))
	NewFD->setInvalidDecl();
	} else if (isFunctionTemplateSpecialization) {
	if (CurContext->isDependentContext() && CurContext->isRecord()
	&& !isFriend) {
	isDependentClassScopeExplicitSpecialization = true;
	} else if (!NewFD->isInvalidDecl() &&
	CheckFunctionTemplateSpecialization(
	NewFD, (HasExplicitTemplateArgs ? &TemplateArgs : nullptr),
	Previous))
	NewFD->setInvalidDecl();

	// C++ [dcl.stc]p1:
	// A storage-class-specifier shall not be specified in an explicit
	// specialization (14.7.3)
	FunctionTemplateSpecializationInfo *Info =
	NewFD->getTemplateSpecializationInfo();
	if (Info && SC != SC_None) {
	if (SC != Info->getTemplate()->getTemplatedDecl()->getStorageClass())
	Diag(NewFD->getLocation(),
	diag::err_explicit_specialization_inconsistent_storage_class)
	<< SC
	<< FixItHint::CreateRemoval(
	D.getDeclSpec().getStorageClassSpecLoc());

	else
	Diag(NewFD->getLocation(),
	diag::ext_explicit_specialization_storage_class)
	<< FixItHint::CreateRemoval(
	D.getDeclSpec().getStorageClassSpecLoc());
	}
	} else if (isMemberSpecialization && isa<CXXMethodDecl>(NewFD)) {
	if (CheckMemberSpecialization(NewFD, Previous))
	NewFD->setInvalidDecl();
	}

	// Perform semantic checking on the function declaration.
	if (!isDependentClassScopeExplicitSpecialization) {
	if (!NewFD->isInvalidDecl() && NewFD->isMain())
	CheckMain(NewFD, D.getDeclSpec());

	if (!NewFD->isInvalidDecl() && NewFD->isMSVCRTEntryPoint())
	CheckMSVCRTEntryPoint(NewFD);

	if (!NewFD->isInvalidDecl())
	D.setRedeclaration(CheckFunctionDeclaration(S, NewFD, Previous,
	isMemberSpecialization,
	D.isFunctionDefinition()));
	else if (!Previous.empty())
	// Recover gracefully from an invalid redeclaration.
	D.setRedeclaration(true);
	}

	assert((NewFD->isInvalidDecl() \|\| NewFD->isMultiVersion() \|\|
	!D.isRedeclaration() \|\|
	Previous.getResultKind() != LookupResult::FoundOverloaded) &&
	"previous declaration set still overloaded");

	NamedDecl *PrincipalDecl = (FunctionTemplate
	? cast<NamedDecl>(FunctionTemplate)
	: NewFD);

	if (isFriend && NewFD->getPreviousDecl()) {
	AccessSpecifier Access = AS_public;
	if (!NewFD->isInvalidDecl())
	Access = NewFD->getPreviousDecl()->getAccess();

	NewFD->setAccess(Access);
	if (FunctionTemplate) FunctionTemplate->setAccess(Access);
	}

	if (NewFD->isOverloadedOperator() && !DC->isRecord() &&
	PrincipalDecl->isInIdentifierNamespace(Decl::IDNS_Ordinary))
	PrincipalDecl->setNonMemberOperator();

	// If we have a function template, check the template parameter
	// list. This will check and merge default template arguments.
	if (FunctionTemplate) {
	FunctionTemplateDecl *PrevTemplate =
	FunctionTemplate->getPreviousDecl();
	CheckTemplateParameterList(FunctionTemplate->getTemplateParameters(),
	PrevTemplate ? PrevTemplate->getTemplateParameters()
	: nullptr,
	D.getDeclSpec().isFriendSpecified()
	? (D.isFunctionDefinition()
	? TPC_FriendFunctionTemplateDefinition
	: TPC_FriendFunctionTemplate)
	: (D.getCXXScopeSpec().isSet() &&
	DC && DC->isRecord() &&
	DC->isDependentContext())
	? TPC_ClassTemplateMember
	: TPC_FunctionTemplate);
	}

	if (NewFD->isInvalidDecl()) {
	// Ignore all the rest of this.
	} else if (!D.isRedeclaration()) {
	struct ActOnFDArgs ExtraArgs = { S, D, TemplateParamLists,
	AddToScope };
	// Fake up an access specifier if it's supposed to be a class member.
	if (isa<CXXRecordDecl>(NewFD->getDeclContext()))
	NewFD->setAccess(AS_public);

	// Qualified decls generally require a previous declaration.
	if (D.getCXXScopeSpec().isSet()) {
	// ...with the major exception of templated-scope or
	// dependent-scope friend declarations.

	// TODO: we currently also suppress this check in dependent
	// contexts because (1) the parameter depth will be off when
	// matching friend templates and (2) we might actually be
	// selecting a friend based on a dependent factor. But there
	// are situations where these conditions don't apply and we
	// can actually do this check immediately.
	//
	// Unless the scope is dependent, it's always an error if qualified
	// redeclaration lookup found nothing at all. Diagnose that now;
	// nothing will diagnose that error later.
	if (isFriend &&
	(D.getCXXScopeSpec().getScopeRep()->isDependent() \|\|
	(!Previous.empty() && CurContext->isDependentContext()))) {
	// ignore these
	} else if (NewFD->isCPUDispatchMultiVersion() \|\|
	NewFD->isCPUSpecificMultiVersion()) {
	// ignore this, we allow the redeclaration behavior here to create new
	// versions of the function.
	} else {
	// The user tried to provide an out-of-line definition for a
	// function that is a member of a class or namespace, but there
	// was no such member function declared (C++ [class.mfct]p2,
	// C++ [namespace.memdef]p2). For example:
	//
	// class X {
	// void f() const;
	// };
	//
	// void X::f() { } // ill-formed
	//
	// Complain about this problem, and attempt to suggest close
	// matches (e.g., those that differ only in cv-qualifiers and
	// whether the parameter types are references).

	if (NamedDecl *Result = DiagnoseInvalidRedeclaration(
	*this, Previous, NewFD, ExtraArgs, false, nullptr)) {
	AddToScope = ExtraArgs.AddToScope;
	return Result;
	}
	}

	// Unqualified local friend declarations are required to resolve
	// to something.
	} else if (isFriend && cast<CXXRecordDecl>(CurContext)->isLocalClass()) {
	if (NamedDecl *Result = DiagnoseInvalidRedeclaration(
	*this, Previous, NewFD, ExtraArgs, true, S)) {
	AddToScope = ExtraArgs.AddToScope;
	return Result;
	}
	}
	} else if (!D.isFunctionDefinition() &&
	isa<CXXMethodDecl>(NewFD) && NewFD->isOutOfLine() &&
	!isFriend && !isFunctionTemplateSpecialization &&
	!isMemberSpecialization) {
	// An out-of-line member function declaration must also be a
	// definition (C++ [class.mfct]p2).
	// Note that this is not the case for explicit specializations of
	// function templates or member functions of class templates, per
	// C++ [temp.expl.spec]p2. We also allow these declarations as an
	// extension for compatibility with old SWIG code which likes to
	// generate them.
	Diag(NewFD->getLocation(), diag::ext_out_of_line_declaration)
	<< D.getCXXScopeSpec().getRange();
	}
	}

	// If this is the first declaration of a library builtin function, add
	// attributes as appropriate.
	if (!D.isRedeclaration()) {
	if (IdentifierInfo *II = Previous.getLookupName().getAsIdentifierInfo()) {
	if (unsigned BuiltinID = II->getBuiltinID()) {
	bool InStdNamespace = Context.BuiltinInfo.isInStdNamespace(BuiltinID);
	if (!InStdNamespace &&
	NewFD->getDeclContext()->getRedeclContext()->isFileContext()) {
	if (NewFD->getLanguageLinkage() == CLanguageLinkage) {
	// Validate the type matches unless this builtin is specified as
	// matching regardless of its declared type.
	if (Context.BuiltinInfo.allowTypeMismatch(BuiltinID)) {
	NewFD->addAttr(BuiltinAttr::CreateImplicit(Context, BuiltinID));
	} else {
	ASTContext::GetBuiltinTypeError Error;
	LookupNecessaryTypesForBuiltin(S, BuiltinID);
	QualType BuiltinType = Context.GetBuiltinType(BuiltinID, Error);

	if (!Error && !BuiltinType.isNull() &&
	Context.hasSameFunctionTypeIgnoringExceptionSpec(
	NewFD->getType(), BuiltinType))
	NewFD->addAttr(BuiltinAttr::CreateImplicit(Context, BuiltinID));
	}
	}
	} else if (InStdNamespace && NewFD->isInStdNamespace() &&
	isStdBuiltin(Context, NewFD, BuiltinID)) {
	NewFD->addAttr(BuiltinAttr::CreateImplicit(Context, BuiltinID));
	}
	}
	}
	}

	ProcessPragmaWeak(S, NewFD);
	checkAttributesAfterMerging(this, NewFD);

	AddKnownFunctionAttributes(NewFD);

	if (NewFD->hasAttr<OverloadableAttr>() &&
	!NewFD->getType()->getAs<FunctionProtoType>()) {
	Diag(NewFD->getLocation(),
	diag::err_attribute_overloadable_no_prototype)
	<< NewFD;
	NewFD->dropAttr<OverloadableAttr>();
	}

	// If there's a #pragma GCC visibility in scope, and this isn't a class
	// member, set the visibility of this function.
	if (!DC->isRecord() && NewFD->isExternallyVisible())
	AddPushedVisibilityAttribute(NewFD);

	// If there's a #pragma clang arc_cf_code_audited in scope, consider
	// marking the function.
	AddCFAuditedAttribute(NewFD);

	// If this is a function definition, check if we have to apply any
	// attributes (i.e. optnone and no_builtin) due to a pragma.
	if (D.isFunctionDefinition()) {
	AddRangeBasedOptnone(NewFD);
	AddImplicitMSFunctionNoBuiltinAttr(NewFD);
	AddSectionMSAllocText(NewFD);
	ModifyFnAttributesMSPragmaOptimize(NewFD);
	}

	// If this is the first declaration of an extern C variable, update
	// the map of such variables.
	if (NewFD->isFirstDecl() && !NewFD->isInvalidDecl() &&
	isIncompleteDeclExternC(*this, NewFD))
	RegisterLocallyScopedExternCDecl(NewFD, S);

	// Set this FunctionDecl's range up to the right paren.
	NewFD->setRangeEnd(D.getSourceRange().getEnd());

	if (D.isRedeclaration() && !Previous.empty()) {
	NamedDecl *Prev = Previous.getRepresentativeDecl();
	checkDLLAttributeRedeclaration(*this, Prev, NewFD,
	isMemberSpecialization \|\|
	isFunctionTemplateSpecialization,
	D.isFunctionDefinition());
	}

	if (getLangOpts().CUDA) {
	IdentifierInfo *II = NewFD->getIdentifier();
	if (II && II->isStr(getCudaConfigureFuncName()) &&
	!NewFD->isInvalidDecl() &&
	NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
	if (!R->castAs<FunctionType>()->getReturnType()->isScalarType())
	Diag(NewFD->getLocation(), diag::err_config_scalar_return)
	<< getCudaConfigureFuncName();
	Context.setcudaConfigureCallDecl(NewFD);
	}

	// Variadic functions, other than a declaration of printf, are not allowed
	// in device-side CUDA code, unless someone passed
	// -fcuda-allow-variadic-functions.
	if (!getLangOpts().CUDAAllowVariadicFunctions && NewFD->isVariadic() &&
	(NewFD->hasAttr<CUDADeviceAttr>() \|\|
	NewFD->hasAttr<CUDAGlobalAttr>()) &&
	!(II && II->isStr("printf") && NewFD->isExternC() &&
	!D.isFunctionDefinition())) {
	Diag(NewFD->getLocation(), diag::err_variadic_device_fn);
	}
	}

	MarkUnusedFileScopedDecl(NewFD);



	if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) {
	// OpenCL v1.2 s6.8 static is invalid for kernel functions.
	if (SC == SC_Static) {
	Diag(D.getIdentifierLoc(), diag::err_static_kernel);
	D.setInvalidType();
	}

	// OpenCL v1.2, s6.9 -- Kernels can only have return type void.
	if (!NewFD->getReturnType()->isVoidType()) {
	SourceRange RTRange = NewFD->getReturnTypeSourceRange();
	Diag(D.getIdentifierLoc(), diag::err_expected_kernel_void_return_type)
	<< (RTRange.isValid() ? FixItHint::CreateReplacement(RTRange, "void")
	: FixItHint());
	D.setInvalidType();
	}

	llvm::SmallPtrSet<const Type *, 16> ValidTypes;
	for (auto *Param : NewFD->parameters())
	checkIsValidOpenCLKernelParameter(*this, D, Param, ValidTypes);

	if (getLangOpts().OpenCLCPlusPlus) {
	if (DC->isRecord()) {
	Diag(D.getIdentifierLoc(), diag::err_method_kernel);
	D.setInvalidType();
	}
	if (FunctionTemplate) {
	Diag(D.getIdentifierLoc(), diag::err_template_kernel);
	D.setInvalidType();
	}
	}
	}

	if (getLangOpts().CPlusPlus) {
	// Precalculate whether this is a friend function template with a constraint
	// that depends on an enclosing template, per [temp.friend]p9.
	if (isFriend && FunctionTemplate &&
	FriendConstraintsDependOnEnclosingTemplate(NewFD))
	NewFD->setFriendConstraintRefersToEnclosingTemplate(true);

	if (FunctionTemplate) {
	if (NewFD->isInvalidDecl())
	FunctionTemplate->setInvalidDecl();
	return FunctionTemplate;
	}

	if (isMemberSpecialization && !NewFD->isInvalidDecl())
	CompleteMemberSpecialization(NewFD, Previous);
	}

	for (const ParmVarDecl *Param : NewFD->parameters()) {
	QualType PT = Param->getType();

	// OpenCL 2.0 pipe restrictions forbids pipe packet types to be non-value
	// types.
	if (getLangOpts().getOpenCLCompatibleVersion() >= 200) {
	if(const PipeType *PipeTy = PT->getAs<PipeType>()) {
	QualType ElemTy = PipeTy->getElementType();
	if (ElemTy->isReferenceType() \|\| ElemTy->isPointerType()) {
	Diag(Param->getTypeSpecStartLoc(), diag::err_reference_pipe_type );
	D.setInvalidType();
	}
	}
	}
	}

	// Here we have an function template explicit specialization at class scope.
	// The actual specialization will be postponed to template instatiation
	// time via the ClassScopeFunctionSpecializationDecl node.
	if (isDependentClassScopeExplicitSpecialization) {
	ClassScopeFunctionSpecializationDecl *NewSpec =
	ClassScopeFunctionSpecializationDecl::Create(
	Context, CurContext, NewFD->getLocation(),
	cast<CXXMethodDecl>(NewFD),
	HasExplicitTemplateArgs, TemplateArgs);
	CurContext->addDecl(NewSpec);
	AddToScope = false;
	}

	// Diagnose availability attributes. Availability cannot be used on functions
	// that are run during load/unload.
	if (const auto *attr = NewFD->getAttr<AvailabilityAttr>()) {
	if (NewFD->hasAttr<ConstructorAttr>()) {
	Diag(attr->getLocation(), diag::warn_availability_on_static_initializer)
	<< 1;
	NewFD->dropAttr<AvailabilityAttr>();
	}
	if (NewFD->hasAttr<DestructorAttr>()) {
	Diag(attr->getLocation(), diag::warn_availability_on_static_initializer)
	<< 2;
	NewFD->dropAttr<AvailabilityAttr>();
	}
	}

	// Diagnose no_builtin attribute on function declaration that are not a
	// definition.
	// FIXME: We should really be doing this in
	// SemaDeclAttr.cpp::handleNoBuiltinAttr, unfortunately we only have access to
	// the FunctionDecl and at this point of the code
	// FunctionDecl::isThisDeclarationADefinition() which always returns `false`
	// because Sema::ActOnStartOfFunctionDef has not been called yet.
	if (const auto *NBA = NewFD->getAttr<NoBuiltinAttr>())
	switch (D.getFunctionDefinitionKind()) {
	case FunctionDefinitionKind::Defaulted:
	case FunctionDefinitionKind::Deleted:
	Diag(NBA->getLocation(),
	diag::err_attribute_no_builtin_on_defaulted_deleted_function)
	<< NBA->getSpelling();
	break;
	case FunctionDefinitionKind::Declaration:
	Diag(NBA->getLocation(), diag::err_attribute_no_builtin_on_non_definition)
	<< NBA->getSpelling();
	break;
	case FunctionDefinitionKind::Definition:
	break;
	}

	return NewFD;
	}

	/// Return a CodeSegAttr from a containing class. The Microsoft docs say
	/// when __declspec(code_seg) "is applied to a class, all member functions of
	/// the class and nested classes -- this includes compiler-generated special
	/// member functions -- are put in the specified segment."
	/// The actual behavior is a little more complicated. The Microsoft compiler
	/// won't check outer classes if there is an active value from #pragma code_seg.
	/// The CodeSeg is always applied from the direct parent but only from outer
	/// classes when the #pragma code_seg stack is empty. See:
	/// https://reviews.llvm.org/D22931, the Microsoft feedback page is no longer
	/// available since MS has removed the page.
	static Attr getImplicitCodeSegAttrFromClass(Sema &S, const FunctionDecl FD) {
	const auto *Method = dyn_cast<CXXMethodDecl>(FD);
	if (!Method)
	return nullptr;
	const CXXRecordDecl *Parent = Method->getParent();
	if (const auto *SAttr = Parent->getAttr<CodeSegAttr>()) {
	Attr *NewAttr = SAttr->clone(S.getASTContext());
	NewAttr->setImplicit(true);
	return NewAttr;
	}

	// The Microsoft compiler won't check outer classes for the CodeSeg
	// when the #pragma code_seg stack is active.
	if (S.CodeSegStack.CurrentValue)
	return nullptr;

	while ((Parent = dyn_cast<CXXRecordDecl>(Parent->getParent()))) {
	if (const auto *SAttr = Parent->getAttr<CodeSegAttr>()) {
	Attr *NewAttr = SAttr->clone(S.getASTContext());
	NewAttr->setImplicit(true);
	return NewAttr;
	}
	}
	return nullptr;
	}

	/// Returns an implicit CodeSegAttr if a __declspec(code_seg) is found on a
	/// containing class. Otherwise it will return implicit SectionAttr if the
	/// function is a definition and there is an active value on CodeSegStack
	/// (from the current #pragma code-seg value).
	///
	/// \param FD Function being declared.
	/// \param IsDefinition Whether it is a definition or just a declaration.
	/// \returns A CodeSegAttr or SectionAttr to apply to the function or
	/// nullptr if no attribute should be added.
	Attr Sema::getImplicitCodeSegOrSectionAttrForFunction(const FunctionDecl FD,
	bool IsDefinition) {
	if (Attr A = getImplicitCodeSegAttrFromClass(this, FD))
	return A;
	if (!FD->hasAttr<SectionAttr>() && IsDefinition &&
	CodeSegStack.CurrentValue)
	return SectionAttr::CreateImplicit(
	getASTContext(), CodeSegStack.CurrentValue->getString(),
	CodeSegStack.CurrentPragmaLocation, AttributeCommonInfo::AS_Pragma,
	SectionAttr::Declspec_allocate);
	return nullptr;
	}

	/// Determines if we can perform a correct type check for \p D as a
	/// redeclaration of \p PrevDecl. If not, we can generally still perform a
	/// best-effort check.
	///
	/// \param NewD The new declaration.
	/// \param OldD The old declaration.
	/// \param NewT The portion of the type of the new declaration to check.
	/// \param OldT The portion of the type of the old declaration to check.
	bool Sema::canFullyTypeCheckRedeclaration(ValueDecl NewD, ValueDecl OldD,
	QualType NewT, QualType OldT) {
	if (!NewD->getLexicalDeclContext()->isDependentContext())
	return true;

	// For dependently-typed local extern declarations and friends, we can't
	// perform a correct type check in general until instantiation:
	//
	// int f();
	// template<typename T> void g() { T f(); }
	//
	// (valid if g() is only instantiated with T = int).
	if (NewT->isDependentType() &&
	(NewD->isLocalExternDecl() \|\| NewD->getFriendObjectKind()))
	return false;

	// Similarly, if the previous declaration was a dependent local extern
	// declaration, we don't really know its type yet.
	if (OldT->isDependentType() && OldD->isLocalExternDecl())
	return false;

	return true;
	}

	/// Checks if the new declaration declared in dependent context must be
	/// put in the same redeclaration chain as the specified declaration.
	///
	/// \param D Declaration that is checked.
	/// \param PrevDecl Previous declaration found with proper lookup method for the
	/// same declaration name.
	/// \returns True if D must be added to the redeclaration chain which PrevDecl
	/// belongs to.
	///
	bool Sema::shouldLinkDependentDeclWithPrevious(Decl D, Decl PrevDecl) {
	if (!D->getLexicalDeclContext()->isDependentContext())
	return true;

	// Don't chain dependent friend function definitions until instantiation, to
	// permit cases like
	//
	// void func();
	// template<typename T> class C1 { friend void func() {} };
	// template<typename T> class C2 { friend void func() {} };
	//
	// ... which is valid if only one of C1 and C2 is ever instantiated.
	//
	// FIXME: This need only apply to function definitions. For now, we proxy
	// this by checking for a file-scope function. We do not want this to apply
	// to friend declarations nominating member functions, because that gets in
	// the way of access checks.
	if (D->getFriendObjectKind() && D->getDeclContext()->isFileContext())
	return false;

	auto *VD = dyn_cast<ValueDecl>(D);
	auto *PrevVD = dyn_cast<ValueDecl>(PrevDecl);
	return !VD \|\| !PrevVD \|\|
	canFullyTypeCheckRedeclaration(VD, PrevVD, VD->getType(),
	PrevVD->getType());
	}

	/// Check the target or target_version attribute of the function for
	/// MultiVersion validity.
	///
	/// Returns true if there was an error, false otherwise.
	static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) {
	const auto *TA = FD->getAttr<TargetAttr>();
	const auto *TVA = FD->getAttr<TargetVersionAttr>();
	assert(
	(TA \|\| TVA) &&
	"MultiVersion candidate requires a target or target_version attribute");
	const TargetInfo &TargetInfo = S.Context.getTargetInfo();
	enum ErrType { Feature = 0, Architecture = 1 };

	if (TA) {
	ParsedTargetAttr ParseInfo =
	S.getASTContext().getTargetInfo().parseTargetAttr(TA->getFeaturesStr());
	if (!ParseInfo.CPU.empty() && !TargetInfo.validateCpuIs(ParseInfo.CPU)) {
	S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
	<< Architecture << ParseInfo.CPU;
	return true;
	}
	for (const auto &Feat : ParseInfo.Features) {
	auto BareFeat = StringRef{Feat}.substr(1);
	if (Feat[0] == '-') {
	S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
	<< Feature << ("no-" + BareFeat).str();
	return true;
	}

	if (!TargetInfo.validateCpuSupports(BareFeat) \|\|
	!TargetInfo.isValidFeatureName(BareFeat)) {
	S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
	<< Feature << BareFeat;
	return true;
	}
	}
	}

	if (TVA) {
	llvm::SmallVector<StringRef, 8> Feats;
	TVA->getFeatures(Feats);
	for (const auto &Feat : Feats) {
	if (!TargetInfo.validateCpuSupports(Feat)) {
	S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
	<< Feature << Feat;
	return true;
	}
	}
	}
	return false;
	}

	// Provide a white-list of attributes that are allowed to be combined with
	// multiversion functions.
	static bool AttrCompatibleWithMultiVersion(attr::Kind Kind,
	MultiVersionKind MVKind) {
	// Note: this list/diagnosis must match the list in
	// checkMultiversionAttributesAllSame.
	switch (Kind) {
	default:
	return false;
	case attr::Used:
	return MVKind == MultiVersionKind::Target;
	case attr::NonNull:
	case attr::NoThrow:
	return true;
	}
	}

	static bool checkNonMultiVersionCompatAttributes(Sema &S,
	const FunctionDecl *FD,
	const FunctionDecl *CausedFD,
	MultiVersionKind MVKind) {
	const auto Diagnose = [FD, CausedFD, MVKind](Sema &S, const Attr *A) {
	S.Diag(FD->getLocation(), diag::err_multiversion_disallowed_other_attr)
	<< static_cast<unsigned>(MVKind) << A;
	if (CausedFD)
	S.Diag(CausedFD->getLocation(), diag::note_multiversioning_caused_here);
	return true;
	};

	for (const Attr *A : FD->attrs()) {
	switch (A->getKind()) {
	case attr::CPUDispatch:
	case attr::CPUSpecific:
	if (MVKind != MultiVersionKind::CPUDispatch &&
	MVKind != MultiVersionKind::CPUSpecific)
	return Diagnose(S, A);
	break;
	case attr::Target:
	if (MVKind != MultiVersionKind::Target)
	return Diagnose(S, A);
	break;
	case attr::TargetVersion:
	if (MVKind != MultiVersionKind::TargetVersion)
	return Diagnose(S, A);
	break;
	case attr::TargetClones:
	if (MVKind != MultiVersionKind::TargetClones)
	return Diagnose(S, A);
	break;
	default:
	if (!AttrCompatibleWithMultiVersion(A->getKind(), MVKind))
	return Diagnose(S, A);
	break;
	}
	}
	return false;
	}

	bool Sema::areMultiversionVariantFunctionsCompatible(
	const FunctionDecl OldFD, const FunctionDecl NewFD,
	const PartialDiagnostic &NoProtoDiagID,
	const PartialDiagnosticAt &NoteCausedDiagIDAt,
	const PartialDiagnosticAt &NoSupportDiagIDAt,
	const PartialDiagnosticAt &DiffDiagIDAt, bool TemplatesSupported,
	bool ConstexprSupported, bool CLinkageMayDiffer) {
	enum DoesntSupport {
	FuncTemplates = 0,
	VirtFuncs = 1,
	DeducedReturn = 2,
	Constructors = 3,
	Destructors = 4,
	DeletedFuncs = 5,
	DefaultedFuncs = 6,
	ConstexprFuncs = 7,
	ConstevalFuncs = 8,
	Lambda = 9,
	};
	enum Different {
	CallingConv = 0,
	ReturnType = 1,
	ConstexprSpec = 2,
	InlineSpec = 3,
	Linkage = 4,
	LanguageLinkage = 5,
	};

	if (NoProtoDiagID.getDiagID() != 0 && OldFD &&
	!OldFD->getType()->getAs<FunctionProtoType>()) {
	Diag(OldFD->getLocation(), NoProtoDiagID);
	Diag(NoteCausedDiagIDAt.first, NoteCausedDiagIDAt.second);
	return true;
	}

	if (NoProtoDiagID.getDiagID() != 0 &&
	!NewFD->getType()->getAs<FunctionProtoType>())
	return Diag(NewFD->getLocation(), NoProtoDiagID);

	if (!TemplatesSupported &&
	NewFD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< FuncTemplates;

	if (const auto *NewCXXFD = dyn_cast<CXXMethodDecl>(NewFD)) {
	if (NewCXXFD->isVirtual())
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< VirtFuncs;

	if (isa<CXXConstructorDecl>(NewCXXFD))
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< Constructors;

	if (isa<CXXDestructorDecl>(NewCXXFD))
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< Destructors;
	}

	if (NewFD->isDeleted())
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< DeletedFuncs;

	if (NewFD->isDefaulted())
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< DefaultedFuncs;

	if (!ConstexprSupported && NewFD->isConstexpr())
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< (NewFD->isConsteval() ? ConstevalFuncs : ConstexprFuncs);

	QualType NewQType = Context.getCanonicalType(NewFD->getType());
	const auto *NewType = cast<FunctionType>(NewQType);
	QualType NewReturnType = NewType->getReturnType();

	if (NewReturnType->isUndeducedType())
	return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second)
	<< DeducedReturn;

	// Ensure the return type is identical.
	if (OldFD) {
	QualType OldQType = Context.getCanonicalType(OldFD->getType());
	const auto *OldType = cast<FunctionType>(OldQType);
	FunctionType::ExtInfo OldTypeInfo = OldType->getExtInfo();
	FunctionType::ExtInfo NewTypeInfo = NewType->getExtInfo();

	if (OldTypeInfo.getCC() != NewTypeInfo.getCC())
	return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << CallingConv;

	QualType OldReturnType = OldType->getReturnType();

	if (OldReturnType != NewReturnType)
	return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << ReturnType;

	if (OldFD->getConstexprKind() != NewFD->getConstexprKind())
	return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << ConstexprSpec;

	if (OldFD->isInlineSpecified() != NewFD->isInlineSpecified())
	return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << InlineSpec;

	if (OldFD->getFormalLinkage() != NewFD->getFormalLinkage())
	return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << Linkage;

	if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC())
	return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << LanguageLinkage;

	if (CheckEquivalentExceptionSpec(
	OldFD->getType()->getAs<FunctionProtoType>(), OldFD->getLocation(),
	NewFD->getType()->getAs<FunctionProtoType>(), NewFD->getLocation()))
	return true;
	}
	return false;
	}

	static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD,
	const FunctionDecl *NewFD,
	bool CausesMV,
	MultiVersionKind MVKind) {
	if (!S.getASTContext().getTargetInfo().supportsMultiVersioning()) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_not_supported);
	if (OldFD)
	S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
	return true;
	}

	bool IsCPUSpecificCPUDispatchMVKind =
	MVKind == MultiVersionKind::CPUDispatch \|\|
	MVKind == MultiVersionKind::CPUSpecific;

	if (CausesMV && OldFD &&
	checkNonMultiVersionCompatAttributes(S, OldFD, NewFD, MVKind))
	return true;

	if (checkNonMultiVersionCompatAttributes(S, NewFD, nullptr, MVKind))
	return true;

	// Only allow transition to MultiVersion if it hasn't been used.
	if (OldFD && CausesMV && OldFD->isUsed(false))
	return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used);

	return S.areMultiversionVariantFunctionsCompatible(
	OldFD, NewFD, S.PDiag(diag::err_multiversion_noproto),
	PartialDiagnosticAt(NewFD->getLocation(),
	S.PDiag(diag::note_multiversioning_caused_here)),
	PartialDiagnosticAt(NewFD->getLocation(),
	S.PDiag(diag::err_multiversion_doesnt_support)
	<< static_cast<unsigned>(MVKind)),
	PartialDiagnosticAt(NewFD->getLocation(),
	S.PDiag(diag::err_multiversion_diff)),
	/TemplatesSupported=/false,
	/ConstexprSupported=/!IsCPUSpecificCPUDispatchMVKind,
	/CLinkageMayDiffer=/false);
	}

	/// Check the validity of a multiversion function declaration that is the
	/// first of its kind. Also sets the multiversion'ness' of the function itself.
	///
	/// This sets NewFD->isInvalidDecl() to true if there was an error.
	///
	/// Returns true if there was an error, false otherwise.
	static bool CheckMultiVersionFirstFunction(Sema &S, FunctionDecl *FD) {
	MultiVersionKind MVKind = FD->getMultiVersionKind();
	assert(MVKind != MultiVersionKind::None &&
	"Function lacks multiversion attribute");
	const auto *TA = FD->getAttr<TargetAttr>();
	const auto *TVA = FD->getAttr<TargetVersionAttr>();
	// Target and target_version only causes MV if it is default, otherwise this
	// is a normal function.
	if ((TA && !TA->isDefaultVersion()) \|\| (TVA && !TVA->isDefaultVersion()))
	return false;

	if ((TA \|\| TVA) && CheckMultiVersionValue(S, FD)) {
	FD->setInvalidDecl();
	return true;
	}

	if (CheckMultiVersionAdditionalRules(S, nullptr, FD, true, MVKind)) {
	FD->setInvalidDecl();
	return true;
	}

	FD->setIsMultiVersion();
	return false;
	}

	static bool PreviousDeclsHaveMultiVersionAttribute(const FunctionDecl *FD) {
	for (const Decl *D = FD->getPreviousDecl(); D; D = D->getPreviousDecl()) {
	if (D->getAsFunction()->getMultiVersionKind() != MultiVersionKind::None)
	return true;
	}

	return false;
	}

	static bool CheckTargetCausesMultiVersioning(Sema &S, FunctionDecl *OldFD,
	FunctionDecl *NewFD,
	bool &Redeclaration,
	NamedDecl *&OldDecl,
	LookupResult &Previous) {
	const auto *NewTA = NewFD->getAttr<TargetAttr>();
	const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
	const auto *OldTA = OldFD->getAttr<TargetAttr>();
	const auto *OldTVA = OldFD->getAttr<TargetVersionAttr>();
	// If the old decl is NOT MultiVersioned yet, and we don't cause that
	// to change, this is a simple redeclaration.
	if ((NewTA && !NewTA->isDefaultVersion() &&
	(!OldTA \|\| OldTA->getFeaturesStr() == NewTA->getFeaturesStr())) \|\|
	(NewTVA && !NewTVA->isDefaultVersion() &&
	(!OldTVA \|\| OldTVA->getName() == NewTVA->getName())))
	return false;

	// Otherwise, this decl causes MultiVersioning.
	if (CheckMultiVersionAdditionalRules(S, OldFD, NewFD, true,
	NewTVA ? MultiVersionKind::TargetVersion
	: MultiVersionKind::Target)) {
	NewFD->setInvalidDecl();
	return true;
	}

	if (CheckMultiVersionValue(S, NewFD)) {
	NewFD->setInvalidDecl();
	return true;
	}

	// If this is 'default', permit the forward declaration.
	if (!OldFD->isMultiVersion() &&
	((NewTA && NewTA->isDefaultVersion() && !OldTA) \|\|
	(NewTVA && NewTVA->isDefaultVersion() && !OldTVA))) {
	Redeclaration = true;
	OldDecl = OldFD;
	OldFD->setIsMultiVersion();
	NewFD->setIsMultiVersion();
	return false;
	}

	if (CheckMultiVersionValue(S, OldFD)) {
	S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
	NewFD->setInvalidDecl();
	return true;
	}

	if (NewTA) {
	ParsedTargetAttr OldParsed =
	S.getASTContext().getTargetInfo().parseTargetAttr(
	OldTA->getFeaturesStr());
	llvm::sort(OldParsed.Features);
	ParsedTargetAttr NewParsed =
	S.getASTContext().getTargetInfo().parseTargetAttr(
	NewTA->getFeaturesStr());
	// Sort order doesn't matter, it just needs to be consistent.
	llvm::sort(NewParsed.Features);
	if (OldParsed == NewParsed) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
	S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}
	}

	if (NewTVA) {
	llvm::SmallVector<StringRef, 8> Feats;
	OldTVA->getFeatures(Feats);
	llvm::sort(Feats);
	llvm::SmallVector<StringRef, 8> NewFeats;
	NewTVA->getFeatures(NewFeats);
	llvm::sort(NewFeats);

	if (Feats == NewFeats) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
	S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}
	}

	for (const auto *FD : OldFD->redecls()) {
	const auto *CurTA = FD->getAttr<TargetAttr>();
	const auto *CurTVA = FD->getAttr<TargetVersionAttr>();
	// We allow forward declarations before ANY multiversioning attributes, but
	// nothing after the fact.
	if (PreviousDeclsHaveMultiVersionAttribute(FD) &&
	((NewTA && (!CurTA \|\| CurTA->isInherited())) \|\|
	(NewTVA && (!CurTVA \|\| CurTVA->isInherited())))) {
	S.Diag(FD->getLocation(), diag::err_multiversion_required_in_redecl)
	<< (NewTA ? 0 : 2);
	S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
	NewFD->setInvalidDecl();
	return true;
	}
	}

	OldFD->setIsMultiVersion();
	NewFD->setIsMultiVersion();
	Redeclaration = false;
	OldDecl = nullptr;
	Previous.clear();
	return false;
	}

	static bool MultiVersionTypesCompatible(MultiVersionKind Old,
	MultiVersionKind New) {
	if (Old == New \|\| Old == MultiVersionKind::None \|\|
	New == MultiVersionKind::None)
	return true;

	return (Old == MultiVersionKind::CPUDispatch &&
	New == MultiVersionKind::CPUSpecific) \|\|
	(Old == MultiVersionKind::CPUSpecific &&
	New == MultiVersionKind::CPUDispatch);
	}

	/// Check the validity of a new function declaration being added to an existing
	/// multiversioned declaration collection.
	static bool CheckMultiVersionAdditionalDecl(
	Sema &S, FunctionDecl OldFD, FunctionDecl NewFD,
	MultiVersionKind NewMVKind, const CPUDispatchAttr *NewCPUDisp,
	const CPUSpecificAttr NewCPUSpec, const TargetClonesAttr NewClones,
	bool &Redeclaration, NamedDecl *&OldDecl, LookupResult &Previous) {
	const auto *NewTA = NewFD->getAttr<TargetAttr>();
	const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
	MultiVersionKind OldMVKind = OldFD->getMultiVersionKind();
	// Disallow mixing of multiversioning types.
	if (!MultiVersionTypesCompatible(OldMVKind, NewMVKind)) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
	S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}

	ParsedTargetAttr NewParsed;
	if (NewTA) {
	NewParsed = S.getASTContext().getTargetInfo().parseTargetAttr(
	NewTA->getFeaturesStr());
	llvm::sort(NewParsed.Features);
	}
	llvm::SmallVector<StringRef, 8> NewFeats;
	if (NewTVA) {
	NewTVA->getFeatures(NewFeats);
	llvm::sort(NewFeats);
	}

	bool UseMemberUsingDeclRules =
	S.CurContext->isRecord() && !NewFD->getFriendObjectKind();

	bool MayNeedOverloadableChecks =
	AllowOverloadingOfFunction(Previous, S.Context, NewFD);

	// Next, check ALL non-invalid non-overloads to see if this is a redeclaration
	// of a previous member of the MultiVersion set.
	for (NamedDecl *ND : Previous) {
	FunctionDecl *CurFD = ND->getAsFunction();
	if (!CurFD \|\| CurFD->isInvalidDecl())
	continue;
	if (MayNeedOverloadableChecks &&
	S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules))
	continue;

	if (NewMVKind == MultiVersionKind::None &&
	OldMVKind == MultiVersionKind::TargetVersion) {
	NewFD->addAttr(TargetVersionAttr::CreateImplicit(
	S.Context, "default", NewFD->getSourceRange(),
	AttributeCommonInfo::AS_GNU));
	NewFD->setIsMultiVersion();
	NewMVKind = MultiVersionKind::TargetVersion;
	if (!NewTVA) {
	NewTVA = NewFD->getAttr<TargetVersionAttr>();
	NewTVA->getFeatures(NewFeats);
	llvm::sort(NewFeats);
	}
	}

	switch (NewMVKind) {
	case MultiVersionKind::None:
	assert(OldMVKind == MultiVersionKind::TargetClones &&
	"Only target_clones can be omitted in subsequent declarations");
	break;
	case MultiVersionKind::Target: {
	const auto *CurTA = CurFD->getAttr<TargetAttr>();
	if (CurTA->getFeaturesStr() == NewTA->getFeaturesStr()) {
	NewFD->setIsMultiVersion();
	Redeclaration = true;
	OldDecl = ND;
	return false;
	}

	ParsedTargetAttr CurParsed =
	S.getASTContext().getTargetInfo().parseTargetAttr(
	CurTA->getFeaturesStr());
	llvm::sort(CurParsed.Features);
	if (CurParsed == NewParsed) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
	S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}
	break;
	}
	case MultiVersionKind::TargetVersion: {
	const auto *CurTVA = CurFD->getAttr<TargetVersionAttr>();
	if (CurTVA->getName() == NewTVA->getName()) {
	NewFD->setIsMultiVersion();
	Redeclaration = true;
	OldDecl = ND;
	return false;
	}
	llvm::SmallVector<StringRef, 8> CurFeats;
	if (CurTVA) {
	CurTVA->getFeatures(CurFeats);
	llvm::sort(CurFeats);
	}
	if (CurFeats == NewFeats) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
	S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}
	break;
	}
	case MultiVersionKind::TargetClones: {
	const auto *CurClones = CurFD->getAttr<TargetClonesAttr>();
	Redeclaration = true;
	OldDecl = CurFD;
	NewFD->setIsMultiVersion();

	if (CurClones && NewClones &&
	(CurClones->featuresStrs_size() != NewClones->featuresStrs_size() \|\|
	!std::equal(CurClones->featuresStrs_begin(),
	CurClones->featuresStrs_end(),
	NewClones->featuresStrs_begin()))) {
	S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match);
	S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}

	return false;
	}
	case MultiVersionKind::CPUSpecific:
	case MultiVersionKind::CPUDispatch: {
	const auto *CurCPUSpec = CurFD->getAttr<CPUSpecificAttr>();
	const auto *CurCPUDisp = CurFD->getAttr<CPUDispatchAttr>();
	// Handle CPUDispatch/CPUSpecific versions.
	// Only 1 CPUDispatch function is allowed, this will make it go through
	// the redeclaration errors.
	if (NewMVKind == MultiVersionKind::CPUDispatch &&
	CurFD->hasAttr<CPUDispatchAttr>()) {
	if (CurCPUDisp->cpus_size() == NewCPUDisp->cpus_size() &&
	std::equal(
	CurCPUDisp->cpus_begin(), CurCPUDisp->cpus_end(),
	NewCPUDisp->cpus_begin(),
	[](const IdentifierInfo Cur, const IdentifierInfo New) {
	return Cur->getName() == New->getName();
	})) {
	NewFD->setIsMultiVersion();
	Redeclaration = true;
	OldDecl = ND;
	return false;
	}

	// If the declarations don't match, this is an error condition.
	S.Diag(NewFD->getLocation(), diag::err_cpu_dispatch_mismatch);
	S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}
	if (NewMVKind == MultiVersionKind::CPUSpecific && CurCPUSpec) {
	if (CurCPUSpec->cpus_size() == NewCPUSpec->cpus_size() &&
	std::equal(
	CurCPUSpec->cpus_begin(), CurCPUSpec->cpus_end(),
	NewCPUSpec->cpus_begin(),
	[](const IdentifierInfo Cur, const IdentifierInfo New) {
	return Cur->getName() == New->getName();
	})) {
	NewFD->setIsMultiVersion();
	Redeclaration = true;
	OldDecl = ND;
	return false;
	}

	// Only 1 version of CPUSpecific is allowed for each CPU.
	for (const IdentifierInfo *CurII : CurCPUSpec->cpus()) {
	for (const IdentifierInfo *NewII : NewCPUSpec->cpus()) {
	if (CurII == NewII) {
	S.Diag(NewFD->getLocation(), diag::err_cpu_specific_multiple_defs)
	<< NewII;
	S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	return true;
	}
	}
	}
	}
	break;
	}
	}
	}

	// Else, this is simply a non-redecl case. Checking the 'value' is only
	// necessary in the Target case, since The CPUSpecific/Dispatch cases are
	// handled in the attribute adding step.
	if ((NewMVKind == MultiVersionKind::TargetVersion \|\|
	NewMVKind == MultiVersionKind::Target) &&
	CheckMultiVersionValue(S, NewFD)) {
	NewFD->setInvalidDecl();
	return true;
	}

	if (CheckMultiVersionAdditionalRules(S, OldFD, NewFD,
	!OldFD->isMultiVersion(), NewMVKind)) {
	NewFD->setInvalidDecl();
	return true;
	}

	// Permit forward declarations in the case where these two are compatible.
	if (!OldFD->isMultiVersion()) {
	OldFD->setIsMultiVersion();
	NewFD->setIsMultiVersion();
	Redeclaration = true;
	OldDecl = OldFD;
	return false;
	}

	NewFD->setIsMultiVersion();
	Redeclaration = false;
	OldDecl = nullptr;
	Previous.clear();
	return false;
	}

	/// Check the validity of a mulitversion function declaration.
	/// Also sets the multiversion'ness' of the function itself.
	///
	/// This sets NewFD->isInvalidDecl() to true if there was an error.
	///
	/// Returns true if there was an error, false otherwise.
	static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
	bool &Redeclaration, NamedDecl *&OldDecl,
	LookupResult &Previous) {
	const auto *NewTA = NewFD->getAttr<TargetAttr>();
	const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
	const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>();
	const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>();
	const auto *NewClones = NewFD->getAttr<TargetClonesAttr>();
	MultiVersionKind MVKind = NewFD->getMultiVersionKind();

	// Main isn't allowed to become a multiversion function, however it IS
	// permitted to have 'main' be marked with the 'target' optimization hint,
	// for 'target_version' only default is allowed.
	if (NewFD->isMain()) {
	if (MVKind != MultiVersionKind::None &&
	!(MVKind == MultiVersionKind::Target && !NewTA->isDefaultVersion()) &&
	!(MVKind == MultiVersionKind::TargetVersion &&
	NewTVA->isDefaultVersion())) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main);
	NewFD->setInvalidDecl();
	return true;
	}
	return false;
	}

	if (!OldDecl \|\| !OldDecl->getAsFunction() \|\|
	OldDecl->getDeclContext()->getRedeclContext() !=
	NewFD->getDeclContext()->getRedeclContext()) {
	// If there's no previous declaration, AND this isn't attempting to cause
	// multiversioning, this isn't an error condition.
	if (MVKind == MultiVersionKind::None)
	return false;
	return CheckMultiVersionFirstFunction(S, NewFD);
	}

	FunctionDecl *OldFD = OldDecl->getAsFunction();

	if (!OldFD->isMultiVersion() && MVKind == MultiVersionKind::None) {
	// No target_version attributes mean default
	if (!NewTVA) {
	const auto *OldTVA = OldFD->getAttr<TargetVersionAttr>();
	if (OldTVA) {
	NewFD->addAttr(TargetVersionAttr::CreateImplicit(
	S.Context, "default", NewFD->getSourceRange(),
	AttributeCommonInfo::AS_GNU));
	NewFD->setIsMultiVersion();
	OldFD->setIsMultiVersion();
	OldDecl = OldFD;
	Redeclaration = true;
	return true;
	}
	}
	return false;
	}

	// Multiversioned redeclarations aren't allowed to omit the attribute, except
	// for target_clones and target_version.
	if (OldFD->isMultiVersion() && MVKind == MultiVersionKind::None &&
	OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones &&
	OldFD->getMultiVersionKind() != MultiVersionKind::TargetVersion) {
	S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl)
	<< (OldFD->getMultiVersionKind() != MultiVersionKind::Target);
	NewFD->setInvalidDecl();
	return true;
	}

	if (!OldFD->isMultiVersion()) {
	switch (MVKind) {
	case MultiVersionKind::Target:
	case MultiVersionKind::TargetVersion:
	return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, Redeclaration,
	OldDecl, Previous);
	case MultiVersionKind::TargetClones:
	if (OldFD->isUsed(false)) {
	NewFD->setInvalidDecl();
	return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used);
	}
	OldFD->setIsMultiVersion();
	break;

	case MultiVersionKind::CPUDispatch:
	case MultiVersionKind::CPUSpecific:
	case MultiVersionKind::None:
	break;
	}
	}

	// At this point, we have a multiversion function decl (in OldFD) AND an
	// appropriate attribute in the current function decl. Resolve that these are
	// still compatible with previous declarations.
	return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, MVKind, NewCPUDisp,
	NewCPUSpec, NewClones, Redeclaration,
	OldDecl, Previous);
	}

	/// Perform semantic checking of a new function declaration.
	///
	/// Performs semantic analysis of the new function declaration
	/// NewFD. This routine performs all semantic checking that does not
	/// require the actual declarator involved in the declaration, and is
	/// used both for the declaration of functions as they are parsed
	/// (called via ActOnDeclarator) and for the declaration of functions
	/// that have been instantiated via C++ template instantiation (called
	/// via InstantiateDecl).
	///
	/// \param IsMemberSpecialization whether this new function declaration is
	/// a member specialization (that replaces any definition provided by the
	/// previous declaration).
	///
	/// This sets NewFD->isInvalidDecl() to true if there was an error.
	///
	/// \returns true if the function declaration is a redeclaration.
	bool Sema::CheckFunctionDeclaration(Scope S, FunctionDecl NewFD,
	LookupResult &Previous,
	bool IsMemberSpecialization,
	bool DeclIsDefn) {
	assert(!NewFD->getReturnType()->isVariablyModifiedType() &&
	"Variably modified return types are not handled here");

	// Determine whether the type of this function should be merged with
	// a previous visible declaration. This never happens for functions in C++,
	// and always happens in C if the previous declaration was visible.
	bool MergeTypeWithPrevious = !getLangOpts().CPlusPlus &&
	!Previous.isShadowed();

	bool Redeclaration = false;
	NamedDecl *OldDecl = nullptr;
	bool MayNeedOverloadableChecks = false;

	// Merge or overload the declaration with an existing declaration of
	// the same name, if appropriate.
	if (!Previous.empty()) {
	// Determine whether NewFD is an overload of PrevDecl or
	// a declaration that requires merging. If it's an overload,
	// there's no more work to do here; we'll just add the new
	// function to the scope.
	if (!AllowOverloadingOfFunction(Previous, Context, NewFD)) {
	NamedDecl *Candidate = Previous.getRepresentativeDecl();
	if (shouldLinkPossiblyHiddenDecl(Candidate, NewFD)) {
	Redeclaration = true;
	OldDecl = Candidate;
	}
	} else {
	MayNeedOverloadableChecks = true;
	switch (CheckOverload(S, NewFD, Previous, OldDecl,
	/NewIsUsingDecl/ false)) {
	case Ovl_Match:
	Redeclaration = true;
	break;

	case Ovl_NonFunction:
	Redeclaration = true;
	break;

	case Ovl_Overload:
	Redeclaration = false;
	break;
	}
	}
	}

	// Check for a previous extern "C" declaration with this name.
	if (!Redeclaration &&
	checkForConflictWithNonVisibleExternC(*this, NewFD, Previous)) {
	if (!Previous.empty()) {
	// This is an extern "C" declaration with the same name as a previous
	// declaration, and thus redeclares that entity...
	Redeclaration = true;
	OldDecl = Previous.getFoundDecl();
	MergeTypeWithPrevious = false;

	// ... except in the presence of __attribute__((overloadable)).
	if (OldDecl->hasAttr<OverloadableAttr>() \|\|
	NewFD->hasAttr<OverloadableAttr>()) {
	if (IsOverload(NewFD, cast<FunctionDecl>(OldDecl), false)) {
	MayNeedOverloadableChecks = true;
	Redeclaration = false;
	OldDecl = nullptr;
	}
	}
	}
	}

	if (CheckMultiVersionFunction(*this, NewFD, Redeclaration, OldDecl, Previous))
	return Redeclaration;

	// PPC MMA non-pointer types are not allowed as function return types.
	if (Context.getTargetInfo().getTriple().isPPC64() &&
	CheckPPCMMAType(NewFD->getReturnType(), NewFD->getLocation())) {
	NewFD->setInvalidDecl();
	}

	// C++11 [dcl.constexpr]p8:
	// A constexpr specifier for a non-static member function that is not
	// a constructor declares that member function to be const.
	//
	// This needs to be delayed until we know whether this is an out-of-line
	// definition of a static member function.
	//
	// This rule is not present in C++1y, so we produce a backwards
	// compatibility warning whenever it happens in C++11.
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewFD);
	if (!getLangOpts().CPlusPlus14 && MD && MD->isConstexpr() &&
	!MD->isStatic() && !isa<CXXConstructorDecl>(MD) &&
	!isa<CXXDestructorDecl>(MD) && !MD->getMethodQualifiers().hasConst()) {
	CXXMethodDecl *OldMD = nullptr;
	if (OldDecl)
	OldMD = dyn_cast_or_null<CXXMethodDecl>(OldDecl->getAsFunction());
	if (!OldMD \|\| !OldMD->isStatic()) {
	const FunctionProtoType *FPT =
	MD->getType()->castAs<FunctionProtoType>();
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.TypeQuals.addConst();
	MD->setType(Context.getFunctionType(FPT->getReturnType(),
	FPT->getParamTypes(), EPI));

	// Warn that we did this, if we're not performing template instantiation.
	// In that case, we'll have warned already when the template was defined.
	if (!inTemplateInstantiation()) {
	SourceLocation AddConstLoc;
	if (FunctionTypeLoc FTL = MD->getTypeSourceInfo()->getTypeLoc()
	.IgnoreParens().getAs<FunctionTypeLoc>())
	AddConstLoc = getLocForEndOfToken(FTL.getRParenLoc());

	Diag(MD->getLocation(), diag::warn_cxx14_compat_constexpr_not_const)
	<< FixItHint::CreateInsertion(AddConstLoc, " const");
	}
	}
	}

	if (Redeclaration) {
	// NewFD and OldDecl represent declarations that need to be
	// merged.
	if (MergeFunctionDecl(NewFD, OldDecl, S, MergeTypeWithPrevious,
	DeclIsDefn)) {
	NewFD->setInvalidDecl();
	return Redeclaration;
	}

	Previous.clear();
	Previous.addDecl(OldDecl);

	if (FunctionTemplateDecl *OldTemplateDecl =
	dyn_cast<FunctionTemplateDecl>(OldDecl)) {
	auto *OldFD = OldTemplateDecl->getTemplatedDecl();
	FunctionTemplateDecl *NewTemplateDecl
	= NewFD->getDescribedFunctionTemplate();
	assert(NewTemplateDecl && "Template/non-template mismatch");

	// The call to MergeFunctionDecl above may have created some state in
	// NewTemplateDecl that needs to be merged with OldTemplateDecl before we
	// can add it as a redeclaration.
	NewTemplateDecl->mergePrevDecl(OldTemplateDecl);

	NewFD->setPreviousDeclaration(OldFD);
	if (NewFD->isCXXClassMember()) {
	NewFD->setAccess(OldTemplateDecl->getAccess());
	NewTemplateDecl->setAccess(OldTemplateDecl->getAccess());
	}

	// If this is an explicit specialization of a member that is a function
	// template, mark it as a member specialization.
	if (IsMemberSpecialization &&
	NewTemplateDecl->getInstantiatedFromMemberTemplate()) {
	NewTemplateDecl->setMemberSpecialization();
	assert(OldTemplateDecl->isMemberSpecialization());
	// Explicit specializations of a member template do not inherit deleted
	// status from the parent member template that they are specializing.
	if (OldFD->isDeleted()) {
	// FIXME: This assert will not hold in the presence of modules.
	assert(OldFD->getCanonicalDecl() == OldFD);
	// FIXME: We need an update record for this AST mutation.
	OldFD->setDeletedAsWritten(false);
	}
	}

	} else {
	if (shouldLinkDependentDeclWithPrevious(NewFD, OldDecl)) {
	auto *OldFD = cast<FunctionDecl>(OldDecl);
	// This needs to happen first so that 'inline' propagates.
	NewFD->setPreviousDeclaration(OldFD);
	if (NewFD->isCXXClassMember())
	NewFD->setAccess(OldFD->getAccess());
	}
	}
	} else if (!getLangOpts().CPlusPlus && MayNeedOverloadableChecks &&
	!NewFD->getAttr<OverloadableAttr>()) {
	assert((Previous.empty() \|\|
	llvm::any_of(Previous,
	[](const NamedDecl *ND) {
	return ND->hasAttr<OverloadableAttr>();
	})) &&
	"Non-redecls shouldn't happen without overloadable present");

	auto OtherUnmarkedIter = llvm::find_if(Previous, [](const NamedDecl *ND) {
	const auto *FD = dyn_cast<FunctionDecl>(ND);
	return FD && !FD->hasAttr<OverloadableAttr>();
	});

	if (OtherUnmarkedIter != Previous.end()) {
	Diag(NewFD->getLocation(),
	diag::err_attribute_overloadable_multiple_unmarked_overloads);
	Diag((*OtherUnmarkedIter)->getLocation(),
	diag::note_attribute_overloadable_prev_overload)
	<< false;

	NewFD->addAttr(OverloadableAttr::CreateImplicit(Context));
	}
	}

	if (LangOpts.OpenMP)
	ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(NewFD);

	// Semantic checking for this function declaration (in isolation).

	if (getLangOpts().CPlusPlus) {
	// C++-specific checks.
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(NewFD)) {
	CheckConstructor(Constructor);
	} else if (CXXDestructorDecl *Destructor =
	dyn_cast<CXXDestructorDecl>(NewFD)) {
	// We check here for invalid destructor names.
	// If we have a friend destructor declaration that is dependent, we can't
	// diagnose right away because cases like this are still valid:
	// template <class T> struct A { friend T::X::~Y(); };
	// struct B { struct Y { ~Y(); }; using X = Y; };
	// template struct A<B>;
	if (NewFD->getFriendObjectKind() == Decl::FriendObjectKind::FOK_None \|\|
	!Destructor->getThisType()->isDependentType()) {
	CXXRecordDecl *Record = Destructor->getParent();
	QualType ClassType = Context.getTypeDeclType(Record);

	DeclarationName Name = Context.DeclarationNames.getCXXDestructorName(
	Context.getCanonicalType(ClassType));
	if (NewFD->getDeclName() != Name) {
	Diag(NewFD->getLocation(), diag::err_destructor_name);
	NewFD->setInvalidDecl();
	return Redeclaration;
	}
	}
	} else if (auto *Guide = dyn_cast<CXXDeductionGuideDecl>(NewFD)) {
	if (auto *TD = Guide->getDescribedFunctionTemplate())
	CheckDeductionGuideTemplate(TD);

	// A deduction guide is not on the list of entities that can be
	// explicitly specialized.
	if (Guide->getTemplateSpecializationKind() == TSK_ExplicitSpecialization)
	Diag(Guide->getBeginLoc(), diag::err_deduction_guide_specialized)
	<< /explicit specialization/ 1;
	}

	// Find any virtual functions that this function overrides.
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(NewFD)) {
	if (!Method->isFunctionTemplateSpecialization() &&
	!Method->getDescribedFunctionTemplate() &&
	Method->isCanonicalDecl()) {
	AddOverriddenMethods(Method->getParent(), Method);
	}
	if (Method->isVirtual() && NewFD->getTrailingRequiresClause())
	// C++2a [class.virtual]p6
	// A virtual method shall not have a requires-clause.
	Diag(NewFD->getTrailingRequiresClause()->getBeginLoc(),
	diag::err_constrained_virtual_method);

	if (Method->isStatic())
	checkThisInStaticMemberFunctionType(Method);
	}

	// C++20: dcl.decl.general p4:
	// The optional requires-clause ([temp.pre]) in an init-declarator or
	// member-declarator shall be present only if the declarator declares a
	// templated function ([dcl.fct]).
	if (Expr *TRC = NewFD->getTrailingRequiresClause()) {
	if (!NewFD->isTemplated() && !NewFD->isTemplateInstantiation())
	Diag(TRC->getBeginLoc(), diag::err_constrained_non_templated_function);
	}

	if (CXXConversionDecl *Conversion = dyn_cast<CXXConversionDecl>(NewFD))
	ActOnConversionDeclarator(Conversion);

	// Extra checking for C++ overloaded operators (C++ [over.oper]).
	if (NewFD->isOverloadedOperator() &&
	CheckOverloadedOperatorDeclaration(NewFD)) {
	NewFD->setInvalidDecl();
	return Redeclaration;
	}

	// Extra checking for C++0x literal operators (C++0x [over.literal]).
	if (NewFD->getLiteralIdentifier() &&
	CheckLiteralOperatorDeclaration(NewFD)) {
	NewFD->setInvalidDecl();
	return Redeclaration;
	}

	// In C++, check default arguments now that we have merged decls. Unless
	// the lexical context is the class, because in this case this is done
	// during delayed parsing anyway.
	if (!CurContext->isRecord())
	CheckCXXDefaultArguments(NewFD);

	// If this function is declared as being extern "C", then check to see if
	// the function returns a UDT (class, struct, or union type) that is not C
	// compatible, and if it does, warn the user.
	// But, issue any diagnostic on the first declaration only.
	if (Previous.empty() && NewFD->isExternC()) {
	QualType R = NewFD->getReturnType();
	if (R->isIncompleteType() && !R->isVoidType())
	Diag(NewFD->getLocation(), diag::warn_return_value_udt_incomplete)
	<< NewFD << R;
	else if (!R.isPODType(Context) && !R->isVoidType() &&
	!R->isObjCObjectPointerType())
	Diag(NewFD->getLocation(), diag::warn_return_value_udt) << NewFD << R;
	}

	// C++1z [dcl.fct]p6:
	// [...] whether the function has a non-throwing exception-specification
	// [is] part of the function type
	//
	// This results in an ABI break between C++14 and C++17 for functions whose
	// declared type includes an exception-specification in a parameter or
	// return type. (Exception specifications on the function itself are OK in
	// most cases, and exception specifications are not permitted in most other
	// contexts where they could make it into a mangling.)
	if (!getLangOpts().CPlusPlus17 && !NewFD->getPrimaryTemplate()) {
	auto HasNoexcept = [&](QualType T) -> bool {
	// Strip off declarator chunks that could be between us and a function
	// type. We don't need to look far, exception specifications are very
	// restricted prior to C++17.
	if (auto *RT = T->getAs<ReferenceType>())
	T = RT->getPointeeType();
	else if (T->isAnyPointerType())
	T = T->getPointeeType();
	else if (auto *MPT = T->getAs<MemberPointerType>())
	T = MPT->getPointeeType();
	if (auto *FPT = T->getAs<FunctionProtoType>())
	if (FPT->isNothrow())
	return true;
	return false;
	};

	auto *FPT = NewFD->getType()->castAs<FunctionProtoType>();
	bool AnyNoexcept = HasNoexcept(FPT->getReturnType());
	for (QualType T : FPT->param_types())
	AnyNoexcept \|= HasNoexcept(T);
	if (AnyNoexcept)
	Diag(NewFD->getLocation(),
	diag::warn_cxx17_compat_exception_spec_in_signature)
	<< NewFD;
	}

	if (!Redeclaration && LangOpts.CUDA)
	checkCUDATargetOverload(NewFD, Previous);
	}
	return Redeclaration;
	}

	void Sema::CheckMain(FunctionDecl* FD, const DeclSpec& DS) {
	// C++11 [basic.start.main]p3:
	// A program that [...] declares main to be inline, static or
	// constexpr is ill-formed.
	// C11 6.7.4p4: In a hosted environment, no function specifier(s) shall
	// appear in a declaration of main.
	// static main is not an error under C99, but we should warn about it.
	// We accept _Noreturn main as an extension.
	if (FD->getStorageClass() == SC_Static)
	Diag(DS.getStorageClassSpecLoc(), getLangOpts().CPlusPlus
	? diag::err_static_main : diag::warn_static_main)
	<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
	if (FD->isInlineSpecified())
	Diag(DS.getInlineSpecLoc(), diag::err_inline_main)
	<< FixItHint::CreateRemoval(DS.getInlineSpecLoc());
	if (DS.isNoreturnSpecified()) {
	SourceLocation NoreturnLoc = DS.getNoreturnSpecLoc();
	SourceRange NoreturnRange(NoreturnLoc, getLocForEndOfToken(NoreturnLoc));
	Diag(NoreturnLoc, diag::ext_noreturn_main);
	Diag(NoreturnLoc, diag::note_main_remove_noreturn)
	<< FixItHint::CreateRemoval(NoreturnRange);
	}
	if (FD->isConstexpr()) {
	Diag(DS.getConstexprSpecLoc(), diag::err_constexpr_main)
	<< FD->isConsteval()
	<< FixItHint::CreateRemoval(DS.getConstexprSpecLoc());
	FD->setConstexprKind(ConstexprSpecKind::Unspecified);
	}

	if (getLangOpts().OpenCL) {
	Diag(FD->getLocation(), diag::err_opencl_no_main)
	<< FD->hasAttr<OpenCLKernelAttr>();
	FD->setInvalidDecl();
	return;
	}

	// Functions named main in hlsl are default entries, but don't have specific
	// signatures they are required to conform to.
	if (getLangOpts().HLSL)
	return;

	QualType T = FD->getType();
	assert(T->isFunctionType() && "function decl is not of function type");
	const FunctionType* FT = T->castAs<FunctionType>();

	// Set default calling convention for main()
	if (FT->getCallConv() != CC_C) {
	FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(CC_C));
	FD->setType(QualType(FT, 0));
	T = Context.getCanonicalType(FD->getType());
	}

	if (getLangOpts().GNUMode && !getLangOpts().CPlusPlus) {
	// In C with GNU extensions we allow main() to have non-integer return
	// type, but we should warn about the extension, and we disable the
	// implicit-return-zero rule.

	// GCC in C mode accepts qualified 'int'.
	if (Context.hasSameUnqualifiedType(FT->getReturnType(), Context.IntTy))
	FD->setHasImplicitReturnZero(true);
	else {
	Diag(FD->getTypeSpecStartLoc(), diag::ext_main_returns_nonint);
	SourceRange RTRange = FD->getReturnTypeSourceRange();
	if (RTRange.isValid())
	Diag(RTRange.getBegin(), diag::note_main_change_return_type)
	<< FixItHint::CreateReplacement(RTRange, "int");
	}
	} else {
	// In C and C++, main magically returns 0 if you fall off the end;
	// set the flag which tells us that.
	// This is C++ [basic.start.main]p5 and C99 5.1.2.2.3.

	// All the standards say that main() should return 'int'.
	if (Context.hasSameType(FT->getReturnType(), Context.IntTy))
	FD->setHasImplicitReturnZero(true);
	else {
	// Otherwise, this is just a flat-out error.
	SourceRange RTRange = FD->getReturnTypeSourceRange();
	Diag(FD->getTypeSpecStartLoc(), diag::err_main_returns_nonint)
	<< (RTRange.isValid() ? FixItHint::CreateReplacement(RTRange, "int")
	: FixItHint());
	FD->setInvalidDecl(true);
	}
	}

	// Treat protoless main() as nullary.
	if (isa<FunctionNoProtoType>(FT)) return;

	const FunctionProtoType* FTP = cast<const FunctionProtoType>(FT);
	unsigned nparams = FTP->getNumParams();
	assert(FD->getNumParams() == nparams);

	bool HasExtraParameters = (nparams > 3);

	if (FTP->isVariadic()) {
	Diag(FD->getLocation(), diag::ext_variadic_main);
	// FIXME: if we had information about the location of the ellipsis, we
	// could add a FixIt hint to remove it as a parameter.
	}

	// Darwin passes an undocumented fourth argument of type char**. If
	// other platforms start sprouting these, the logic below will start
	// getting shifty.
	if (nparams == 4 && Context.getTargetInfo().getTriple().isOSDarwin())
	HasExtraParameters = false;

	if (HasExtraParameters) {
	Diag(FD->getLocation(), diag::err_main_surplus_args) << nparams;
	FD->setInvalidDecl(true);
	nparams = 3;
	}

	// FIXME: a lot of the following diagnostics would be improved
	// if we had some location information about types.

	QualType CharPP =
	Context.getPointerType(Context.getPointerType(Context.CharTy));
	QualType Expected[] = { Context.IntTy, CharPP, CharPP, CharPP };

	for (unsigned i = 0; i < nparams; ++i) {
	QualType AT = FTP->getParamType(i);

	bool mismatch = true;

	if (Context.hasSameUnqualifiedType(AT, Expected[i]))
	mismatch = false;
	else if (Expected[i] == CharPP) {
	// As an extension, the following forms are okay:
	// char const **
	// char const * const *
	// char * const *

	QualifierCollector qs;
	const PointerType* PT;
	if ((PT = qs.strip(AT)->getAs<PointerType>()) &&
	(PT = qs.strip(PT->getPointeeType())->getAs<PointerType>()) &&
	Context.hasSameType(QualType(qs.strip(PT->getPointeeType()), 0),
	Context.CharTy)) {
	qs.removeConst();
	mismatch = !qs.empty();
	}
	}

	if (mismatch) {
	Diag(FD->getLocation(), diag::err_main_arg_wrong) << i << Expected[i];
	// TODO: suggest replacing given type with expected type
	FD->setInvalidDecl(true);
	}
	}

	if (nparams == 1 && !FD->isInvalidDecl()) {
	Diag(FD->getLocation(), diag::warn_main_one_arg);
	}

	if (!FD->isInvalidDecl() && FD->getDescribedFunctionTemplate()) {
	Diag(FD->getLocation(), diag::err_mainlike_template_decl) << FD;
	FD->setInvalidDecl();
	}
	}

	static bool isDefaultStdCall(FunctionDecl *FD, Sema &S) {

	// Default calling convention for main and wmain is __cdecl
	if (FD->getName() == "main" \|\| FD->getName() == "wmain")
	return false;

	// Default calling convention for MinGW is __cdecl
	const llvm::Triple &T = S.Context.getTargetInfo().getTriple();
	if (T.isWindowsGNUEnvironment())
	return false;

	// Default calling convention for WinMain, wWinMain and DllMain
	// is __stdcall on 32 bit Windows
	if (T.isOSWindows() && T.getArch() == llvm::Triple::x86)
	return true;

	return false;
	}

	void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) {
	QualType T = FD->getType();
	assert(T->isFunctionType() && "function decl is not of function type");
	const FunctionType *FT = T->castAs<FunctionType>();

	// Set an implicit return of 'zero' if the function can return some integral,
	// enumeration, pointer or nullptr type.
	if (FT->getReturnType()->isIntegralOrEnumerationType() \|\|
	FT->getReturnType()->isAnyPointerType() \|\|
	FT->getReturnType()->isNullPtrType())
	// DllMain is exempt because a return value of zero means it failed.
	if (FD->getName() != "DllMain")
	FD->setHasImplicitReturnZero(true);

	// Explicity specified calling conventions are applied to MSVC entry points
	if (!hasExplicitCallingConv(T)) {
	if (isDefaultStdCall(FD, *this)) {
	if (FT->getCallConv() != CC_X86StdCall) {
	FT = Context.adjustFunctionType(
	FT, FT->getExtInfo().withCallingConv(CC_X86StdCall));
	FD->setType(QualType(FT, 0));
	}
	} else if (FT->getCallConv() != CC_C) {
	FT = Context.adjustFunctionType(FT,
	FT->getExtInfo().withCallingConv(CC_C));
	FD->setType(QualType(FT, 0));
	}
	}

	if (!FD->isInvalidDecl() && FD->getDescribedFunctionTemplate()) {
	Diag(FD->getLocation(), diag::err_mainlike_template_decl) << FD;
	FD->setInvalidDecl();
	}
	}

	void Sema::CheckHLSLEntryPoint(FunctionDecl *FD) {
	auto &TargetInfo = getASTContext().getTargetInfo();
	auto const Triple = TargetInfo.getTriple();
	switch (Triple.getEnvironment()) {
	default:
	// FIXME: check all shader profiles.
	break;
	case llvm::Triple::EnvironmentType::Compute:
	if (!FD->hasAttr<HLSLNumThreadsAttr>()) {
	Diag(FD->getLocation(), diag::err_hlsl_missing_numthreads)
	<< Triple.getEnvironmentName();
	FD->setInvalidDecl();
	}
	break;
	}

	for (const auto *Param : FD->parameters()) {
	if (!Param->hasAttr<HLSLAnnotationAttr>()) {
	// FIXME: Handle struct parameters where annotations are on struct fields.
	// See: https://github.com/llvm/llvm-project/issues/57875
	Diag(FD->getLocation(), diag::err_hlsl_missing_semantic_annotation);
	Diag(Param->getLocation(), diag::note_previous_decl) << Param;
	FD->setInvalidDecl();
	}
	}
	// FIXME: Verify return type semantic annotation.
	}

	bool Sema::CheckForConstantInitializer(Expr *Init, QualType DclT) {
	// FIXME: Need strict checking. In C89, we need to check for
	// any assignment, increment, decrement, function-calls, or
	// commas outside of a sizeof. In C99, it's the same list,
	// except that the aforementioned are allowed in unevaluated
	// expressions. Everything else falls under the
	// "may accept other forms of constant expressions" exception.
	//
	// Regular C++ code will not end up here (exceptions: language extensions,
	// OpenCL C++ etc), so the constant expression rules there don't matter.
	if (Init->isValueDependent()) {
	assert(Init->containsErrors() &&
	"Dependent code should only occur in error-recovery path.");
	return true;
	}
	const Expr *Culprit;
	if (Init->isConstantInitializer(Context, false, &Culprit))
	return false;
	Diag(Culprit->getExprLoc(), diag::err_init_element_not_constant)
	<< Culprit->getSourceRange();
	return true;
	}

	namespace {
	// Visits an initialization expression to see if OrigDecl is evaluated in
	// its own initialization and throws a warning if it does.
	class SelfReferenceChecker
	: public EvaluatedExprVisitor<SelfReferenceChecker> {
	Sema &S;
	Decl *OrigDecl;
	bool isRecordType;
	bool isPODType;
	bool isReferenceType;

	bool isInitList;
	llvm::SmallVector<unsigned, 4> InitFieldIndex;

	public:
	typedef EvaluatedExprVisitor<SelfReferenceChecker> Inherited;

	SelfReferenceChecker(Sema &S, Decl *OrigDecl) : Inherited(S.Context),
	S(S), OrigDecl(OrigDecl) {
	isPODType = false;
	isRecordType = false;
	isReferenceType = false;
	isInitList = false;
	if (ValueDecl *VD = dyn_cast<ValueDecl>(OrigDecl)) {
	isPODType = VD->getType().isPODType(S.Context);
	isRecordType = VD->getType()->isRecordType();
	isReferenceType = VD->getType()->isReferenceType();
	}
	}

	// For most expressions, just call the visitor. For initializer lists,
	// track the index of the field being initialized since fields are
	// initialized in order allowing use of previously initialized fields.
	void CheckExpr(Expr *E) {
	InitListExpr *InitList = dyn_cast<InitListExpr>(E);
	if (!InitList) {
	Visit(E);
	return;
	}

	// Track and increment the index here.
	isInitList = true;
	InitFieldIndex.push_back(0);
	for (auto *Child : InitList->children()) {
	CheckExpr(cast<Expr>(Child));
	++InitFieldIndex.back();
	}
	InitFieldIndex.pop_back();
	}

	// Returns true if MemberExpr is checked and no further checking is needed.
	// Returns false if additional checking is required.
	bool CheckInitListMemberExpr(MemberExpr *E, bool CheckReference) {
	llvm::SmallVector<FieldDecl*, 4> Fields;
	Expr *Base = E;
	bool ReferenceField = false;

	// Get the field members used.
	while (MemberExpr *ME = dyn_cast<MemberExpr>(Base)) {
	FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
	if (!FD)
	return false;
	Fields.push_back(FD);
	if (FD->getType()->isReferenceType())
	ReferenceField = true;
	Base = ME->getBase()->IgnoreParenImpCasts();
	}

	// Keep checking only if the base Decl is the same.
	DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base);
	if (!DRE \|\| DRE->getDecl() != OrigDecl)
	return false;

	// A reference field can be bound to an unininitialized field.
	if (CheckReference && !ReferenceField)
	return true;

	// Convert FieldDecls to their index number.
	llvm::SmallVector<unsigned, 4> UsedFieldIndex;
	for (const FieldDecl *I : llvm::reverse(Fields))
	UsedFieldIndex.push_back(I->getFieldIndex());

	// See if a warning is needed by checking the first difference in index
	// numbers. If field being used has index less than the field being
	// initialized, then the use is safe.
	for (auto UsedIter = UsedFieldIndex.begin(),
	UsedEnd = UsedFieldIndex.end(),
	OrigIter = InitFieldIndex.begin(),
	OrigEnd = InitFieldIndex.end();
	UsedIter != UsedEnd && OrigIter != OrigEnd; ++UsedIter, ++OrigIter) {
	if (UsedIter < OrigIter)
	return true;
	if (UsedIter > OrigIter)
	break;
	}

	// TODO: Add a different warning which will print the field names.
	HandleDeclRefExpr(DRE);
	return true;
	}

	// For most expressions, the cast is directly above the DeclRefExpr.
	// For conditional operators, the cast can be outside the conditional
	// operator if both expressions are DeclRefExpr's.
	void HandleValue(Expr *E) {
	E = E->IgnoreParens();
	if (DeclRefExpr* DRE = dyn_cast<DeclRefExpr>(E)) {
	HandleDeclRefExpr(DRE);
	return;
	}

	if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) {
	Visit(CO->getCond());
	HandleValue(CO->getTrueExpr());
	HandleValue(CO->getFalseExpr());
	return;
	}

	if (BinaryConditionalOperator *BCO =
	dyn_cast<BinaryConditionalOperator>(E)) {
	Visit(BCO->getCond());
	HandleValue(BCO->getFalseExpr());
	return;
	}

	if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
	HandleValue(OVE->getSourceExpr());
	return;
	}

	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
	if (BO->getOpcode() == BO_Comma) {
	Visit(BO->getLHS());
	HandleValue(BO->getRHS());
	return;
	}
	}

	if (isa<MemberExpr>(E)) {
	if (isInitList) {
	if (CheckInitListMemberExpr(cast<MemberExpr>(E),
	false /CheckReference/))
	return;
	}

	Expr *Base = E->IgnoreParenImpCasts();
	while (MemberExpr *ME = dyn_cast<MemberExpr>(Base)) {
	// Check for static member variables and don't warn on them.
	if (!isa<FieldDecl>(ME->getMemberDecl()))
	return;
	Base = ME->getBase()->IgnoreParenImpCasts();
	}
	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base))
	HandleDeclRefExpr(DRE);
	return;
	}

	Visit(E);
	}

	// Reference types not handled in HandleValue are handled here since all
	// uses of references are bad, not just r-value uses.
	void VisitDeclRefExpr(DeclRefExpr *E) {
	if (isReferenceType)
	HandleDeclRefExpr(E);
	}

	void VisitImplicitCastExpr(ImplicitCastExpr *E) {
	if (E->getCastKind() == CK_LValueToRValue) {
	HandleValue(E->getSubExpr());
	return;
	}

	Inherited::VisitImplicitCastExpr(E);
	}

	void VisitMemberExpr(MemberExpr *E) {
	if (isInitList) {
	if (CheckInitListMemberExpr(E, true /CheckReference/))
	return;
	}

	// Don't warn on arrays since they can be treated as pointers.
	if (E->getType()->canDecayToPointerType()) return;

	// Warn when a non-static method call is followed by non-static member
	// field accesses, which is followed by a DeclRefExpr.
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(E->getMemberDecl());
	bool Warn = (MD && !MD->isStatic());
	Expr *Base = E->getBase()->IgnoreParenImpCasts();
	while (MemberExpr *ME = dyn_cast<MemberExpr>(Base)) {
	if (!isa<FieldDecl>(ME->getMemberDecl()))
	Warn = false;
	Base = ME->getBase()->IgnoreParenImpCasts();
	}

	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base)) {
	if (Warn)
	HandleDeclRefExpr(DRE);
	return;
	}

	// The base of a MemberExpr is not a MemberExpr or a DeclRefExpr.
	// Visit that expression.
	Visit(Base);
	}

	void VisitCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
	Expr *Callee = E->getCallee();

	if (isa<UnresolvedLookupExpr>(Callee))
	return Inherited::VisitCXXOperatorCallExpr(E);

	Visit(Callee);
	for (auto Arg: E->arguments())
	HandleValue(Arg->IgnoreParenImpCasts());
	}

	void VisitUnaryOperator(UnaryOperator *E) {
	// For POD record types, addresses of its own members are well-defined.
	if (E->getOpcode() == UO_AddrOf && isRecordType &&
	isa<MemberExpr>(E->getSubExpr()->IgnoreParens())) {
	if (!isPODType)
	HandleValue(E->getSubExpr());
	return;
	}

	if (E->isIncrementDecrementOp()) {
	HandleValue(E->getSubExpr());
	return;
	}

	Inherited::VisitUnaryOperator(E);
	}

	void VisitObjCMessageExpr(ObjCMessageExpr *E) {}

	void VisitCXXConstructExpr(CXXConstructExpr *E) {
	if (E->getConstructor()->isCopyConstructor()) {
	Expr *ArgExpr = E->getArg(0);
	if (InitListExpr *ILE = dyn_cast<InitListExpr>(ArgExpr))
	if (ILE->getNumInits() == 1)
	ArgExpr = ILE->getInit(0);
	if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(ArgExpr))
	if (ICE->getCastKind() == CK_NoOp)
	ArgExpr = ICE->getSubExpr();
	HandleValue(ArgExpr);
	return;
	}
	Inherited::VisitCXXConstructExpr(E);
	}

	void VisitCallExpr(CallExpr *E) {
	// Treat std::move as a use.
	if (E->isCallToStdMove()) {
	HandleValue(E->getArg(0));
	return;
	}

	Inherited::VisitCallExpr(E);
	}

	void VisitBinaryOperator(BinaryOperator *E) {
	if (E->isCompoundAssignmentOp()) {
	HandleValue(E->getLHS());
	Visit(E->getRHS());
	return;
	}

	Inherited::VisitBinaryOperator(E);
	}

	// A custom visitor for BinaryConditionalOperator is needed because the
	// regular visitor would check the condition and true expression separately
	// but both point to the same place giving duplicate diagnostics.
	void VisitBinaryConditionalOperator(BinaryConditionalOperator *E) {
	Visit(E->getCond());
	Visit(E->getFalseExpr());
	}

	void HandleDeclRefExpr(DeclRefExpr *DRE) {
	Decl* ReferenceDecl = DRE->getDecl();
	if (OrigDecl != ReferenceDecl) return;
	unsigned diag;
	if (isReferenceType) {
	diag = diag::warn_uninit_self_reference_in_reference_init;
	} else if (cast<VarDecl>(OrigDecl)->isStaticLocal()) {
	diag = diag::warn_static_self_reference_in_init;
	} else if (isa<TranslationUnitDecl>(OrigDecl->getDeclContext()) \|\|
	isa<NamespaceDecl>(OrigDecl->getDeclContext()) \|\|
	DRE->getDecl()->getType()->isRecordType()) {
	diag = diag::warn_uninit_self_reference_in_init;
	} else {
	// Local variables will be handled by the CFG analysis.
	return;
	}

	S.DiagRuntimeBehavior(DRE->getBeginLoc(), DRE,
	S.PDiag(diag)
	<< DRE->getDecl() << OrigDecl->getLocation()
	<< DRE->getSourceRange());
	}
	};

	/// CheckSelfReference - Warns if OrigDecl is used in expression E.
	static void CheckSelfReference(Sema &S, Decl* OrigDecl, Expr *E,
	bool DirectInit) {
	// Parameters arguments are occassionially constructed with itself,
	// for instance, in recursive functions. Skip them.
	if (isa<ParmVarDecl>(OrigDecl))
	return;

	E = E->IgnoreParens();

	// Skip checking T a = a where T is not a record or reference type.
	// Doing so is a way to silence uninitialized warnings.
	if (!DirectInit && !cast<VarDecl>(OrigDecl)->getType()->isRecordType())
	if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E))
	if (ICE->getCastKind() == CK_LValueToRValue)
	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr()))
	if (DRE->getDecl() == OrigDecl)
	return;

	SelfReferenceChecker(S, OrigDecl).CheckExpr(E);
	}
	} // end anonymous namespace

	namespace {
	// Simple wrapper to add the name of a variable or (if no variable is
	// available) a DeclarationName into a diagnostic.
	struct VarDeclOrName {
	VarDecl *VDecl;
	DeclarationName Name;

	friend const Sema::SemaDiagnosticBuilder &
	operator<<(const Sema::SemaDiagnosticBuilder &Diag, VarDeclOrName VN) {
	return VN.VDecl ? Diag << VN.VDecl : Diag << VN.Name;
	}
	};
	} // end anonymous namespace

	QualType Sema::deduceVarTypeFromInitializer(VarDecl *VDecl,
	DeclarationName Name, QualType Type,
	TypeSourceInfo *TSI,
	SourceRange Range, bool DirectInit,
	Expr *Init) {
	bool IsInitCapture = !VDecl;
	assert((!VDecl \|\| !VDecl->isInitCapture()) &&
	"init captures are expected to be deduced prior to initialization");

	VarDeclOrName VN{VDecl, Name};

	DeducedType *Deduced = Type->getContainedDeducedType();
	assert(Deduced && "deduceVarTypeFromInitializer for non-deduced type");

	// C++11 [dcl.spec.auto]p3
	if (!Init) {
	assert(VDecl && "no init for init capture deduction?");

	// Except for class argument deduction, and then for an initializing
	// declaration only, i.e. no static at class scope or extern.
	if (!isa<DeducedTemplateSpecializationType>(Deduced) \|\|
	VDecl->hasExternalStorage() \|\|
	VDecl->isStaticDataMember()) {
	Diag(VDecl->getLocation(), diag::err_auto_var_requires_init)
	<< VDecl->getDeclName() << Type;
	return QualType();
	}
	}

	ArrayRef<Expr*> DeduceInits;
	if (Init)
	DeduceInits = Init;

	if (DirectInit) {
	if (auto *PL = dyn_cast_or_null<ParenListExpr>(Init))
	DeduceInits = PL->exprs();
	}

	if (isa<DeducedTemplateSpecializationType>(Deduced)) {
	assert(VDecl && "non-auto type for init capture deduction?");
	InitializedEntity Entity = InitializedEntity::InitializeVariable(VDecl);
	InitializationKind Kind = InitializationKind::CreateForInit(
	VDecl->getLocation(), DirectInit, Init);
	// FIXME: Initialization should not be taking a mutable list of inits.
	SmallVector<Expr*, 8> InitsCopy(DeduceInits.begin(), DeduceInits.end());
	return DeduceTemplateSpecializationFromInitializer(TSI, Entity, Kind,
	InitsCopy);
	}

	if (DirectInit) {
	if (auto *IL = dyn_cast<InitListExpr>(Init))
	DeduceInits = IL->inits();
	}

	// Deduction only works if we have exactly one source expression.
	if (DeduceInits.empty()) {
	// It isn't possible to write this directly, but it is possible to
	// end up in this situation with "auto x(some_pack...);"
	Diag(Init->getBeginLoc(), IsInitCapture
	? diag::err_init_capture_no_expression
	: diag::err_auto_var_init_no_expression)
	<< VN << Type << Range;
	return QualType();
	}

	if (DeduceInits.size() > 1) {
	Diag(DeduceInits[1]->getBeginLoc(),
	IsInitCapture ? diag::err_init_capture_multiple_expressions
	: diag::err_auto_var_init_multiple_expressions)
	<< VN << Type << Range;
	return QualType();
	}

	Expr *DeduceInit = DeduceInits[0];
	if (DirectInit && isa<InitListExpr>(DeduceInit)) {
	Diag(Init->getBeginLoc(), IsInitCapture
	? diag::err_init_capture_paren_braces
	: diag::err_auto_var_init_paren_braces)
	<< isa<InitListExpr>(Init) << VN << Type << Range;
	return QualType();
	}

	// Expressions default to 'id' when we're in a debugger.
	bool DefaultedAnyToId = false;
	if (getLangOpts().DebuggerCastResultToId &&
	Init->getType() == Context.UnknownAnyTy && !IsInitCapture) {
	ExprResult Result = forceUnknownAnyToType(Init, Context.getObjCIdType());
	if (Result.isInvalid()) {
	return QualType();
	}
	Init = Result.get();
	DefaultedAnyToId = true;
	}

	// C++ [dcl.decomp]p1:
	// If the assignment-expression [...] has array type A and no ref-qualifier
	// is present, e has type cv A
	if (VDecl && isa<DecompositionDecl>(VDecl) &&
	Context.hasSameUnqualifiedType(Type, Context.getAutoDeductType()) &&
	DeduceInit->getType()->isConstantArrayType())
	return Context.getQualifiedType(DeduceInit->getType(),
	Type.getQualifiers());

	QualType DeducedType;
	TemplateDeductionInfo Info(DeduceInit->getExprLoc());
	TemplateDeductionResult Result =
	DeduceAutoType(TSI->getTypeLoc(), DeduceInit, DeducedType, Info);
	if (Result != TDK_Success && Result != TDK_AlreadyDiagnosed) {
	if (!IsInitCapture)
	DiagnoseAutoDeductionFailure(VDecl, DeduceInit);
	else if (isa<InitListExpr>(Init))
	Diag(Range.getBegin(),
	diag::err_init_capture_deduction_failure_from_init_list)
	<< VN
	<< (DeduceInit->getType().isNull() ? TSI->getType()
	: DeduceInit->getType())
	<< DeduceInit->getSourceRange();
	else
	Diag(Range.getBegin(), diag::err_init_capture_deduction_failure)
	<< VN << TSI->getType()
	<< (DeduceInit->getType().isNull() ? TSI->getType()
	: DeduceInit->getType())
	<< DeduceInit->getSourceRange();
	}

	// Warn if we deduced 'id'. 'auto' usually implies type-safety, but using
	// 'id' instead of a specific object type prevents most of our usual
	// checks.
	// We only want to warn outside of template instantiations, though:
	// inside a template, the 'id' could have come from a parameter.
	if (!inTemplateInstantiation() && !DefaultedAnyToId && !IsInitCapture &&
	!DeducedType.isNull() && DeducedType->isObjCIdType()) {
	SourceLocation Loc = TSI->getTypeLoc().getBeginLoc();
	Diag(Loc, diag::warn_auto_var_is_id) << VN << Range;
	}

	return DeducedType;
	}

	bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit,
	Expr *Init) {
	assert(!Init \|\| !Init->containsErrors());
	QualType DeducedType = deduceVarTypeFromInitializer(
	VDecl, VDecl->getDeclName(), VDecl->getType(), VDecl->getTypeSourceInfo(),
	VDecl->getSourceRange(), DirectInit, Init);
	if (DeducedType.isNull()) {
	VDecl->setInvalidDecl();
	return true;
	}

	VDecl->setType(DeducedType);
	assert(VDecl->isLinkageValid());

	// In ARC, infer lifetime.
	if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl))
	VDecl->setInvalidDecl();

	if (getLangOpts().OpenCL)
	deduceOpenCLAddressSpace(VDecl);

	// If this is a redeclaration, check that the type we just deduced matches
	// the previously declared type.
	if (VarDecl *Old = VDecl->getPreviousDecl()) {
	// We never need to merge the type, because we cannot form an incomplete
	// array of auto, nor deduce such a type.
	MergeVarDeclTypes(VDecl, Old, /MergeTypeWithPrevious/ false);
	}

	// Check the deduced type is valid for a variable declaration.
	CheckVariableDeclarationType(VDecl);
	return VDecl->isInvalidDecl();
	}

	void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init,
	SourceLocation Loc) {
	if (auto *EWC = dyn_cast<ExprWithCleanups>(Init))
	Init = EWC->getSubExpr();

	if (auto *CE = dyn_cast<ConstantExpr>(Init))
	Init = CE->getSubExpr();

	QualType InitType = Init->getType();
	assert((InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion() \|\|
	InitType.hasNonTrivialToPrimitiveCopyCUnion()) &&
	"shouldn't be called if type doesn't have a non-trivial C struct");
	if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
	for (auto *I : ILE->inits()) {
	if (!I->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() &&
	!I->getType().hasNonTrivialToPrimitiveCopyCUnion())
	continue;
	SourceLocation SL = I->getExprLoc();
	checkNonTrivialCUnionInInitializer(I, SL.isValid() ? SL : Loc);
	}
	return;
	}

	if (isa<ImplicitValueInitExpr>(Init)) {
	if (InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion())
	checkNonTrivialCUnion(InitType, Loc, NTCUC_DefaultInitializedObject,
	NTCUK_Init);
	} else {
	// Assume all other explicit initializers involving copying some existing
	// object.
	// TODO: ignore any explicit initializers where we can guarantee
	// copy-elision.
	if (InitType.hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnion(InitType, Loc, NTCUC_CopyInit, NTCUK_Copy);
	}
	}

	namespace {

	bool shouldIgnoreForRecordTriviality(const FieldDecl *FD) {
	// Ignore unavailable fields. A field can be marked as unavailable explicitly
	// in the source code or implicitly by the compiler if it is in a union
	// defined in a system header and has non-trivial ObjC ownership
	// qualifications. We don't want those fields to participate in determining
	// whether the containing union is non-trivial.
	return FD->hasAttr<UnavailableAttr>();
	}

	struct DiagNonTrivalCUnionDefaultInitializeVisitor
	: DefaultInitializedTypeVisitor<DiagNonTrivalCUnionDefaultInitializeVisitor,
	void> {
	using Super =
	DefaultInitializedTypeVisitor<DiagNonTrivalCUnionDefaultInitializeVisitor,
	void>;

	DiagNonTrivalCUnionDefaultInitializeVisitor(
	QualType OrigTy, SourceLocation OrigLoc,
	Sema::NonTrivialCUnionContext UseContext, Sema &S)
	: OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {}

	void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType QT,
	const FieldDecl *FD, bool InNonTrivialUnion) {
	if (const auto *AT = S.Context.getAsArrayType(QT))
	return this->asDerived().visit(S.Context.getBaseElementType(AT), FD,
	InNonTrivialUnion);
	return Super::visitWithKind(PDIK, QT, FD, InNonTrivialUnion);
	}

	void visitARCStrong(QualType QT, const FieldDecl *FD,
	bool InNonTrivialUnion) {
	if (InNonTrivialUnion)
	S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
	<< 1 << 0 << QT << FD->getName();
	}

	void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
	if (InNonTrivialUnion)
	S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
	<< 1 << 0 << QT << FD->getName();
	}

	void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
	const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
	if (RD->isUnion()) {
	if (OrigLoc.isValid()) {
	bool IsUnion = false;
	if (auto *OrigRD = OrigTy->getAsRecordDecl())
	IsUnion = OrigRD->isUnion();
	S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context)
	<< 0 << OrigTy << IsUnion << UseContext;
	// Reset OrigLoc so that this diagnostic is emitted only once.
	OrigLoc = SourceLocation();
	}
	InNonTrivialUnion = true;
	}

	if (InNonTrivialUnion)
	S.Diag(RD->getLocation(), diag::note_non_trivial_c_union)
	<< 0 << 0 << QT.getUnqualifiedType() << "";

	for (const FieldDecl *FD : RD->fields())
	if (!shouldIgnoreForRecordTriviality(FD))
	asDerived().visit(FD->getType(), FD, InNonTrivialUnion);
	}

	void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {}

	// The non-trivial C union type or the struct/union type that contains a
	// non-trivial C union.
	QualType OrigTy;
	SourceLocation OrigLoc;
	Sema::NonTrivialCUnionContext UseContext;
	Sema &S;
	};

	struct DiagNonTrivalCUnionDestructedTypeVisitor
	: DestructedTypeVisitor<DiagNonTrivalCUnionDestructedTypeVisitor, void> {
	using Super =
	DestructedTypeVisitor<DiagNonTrivalCUnionDestructedTypeVisitor, void>;

	DiagNonTrivalCUnionDestructedTypeVisitor(
	QualType OrigTy, SourceLocation OrigLoc,
	Sema::NonTrivialCUnionContext UseContext, Sema &S)
	: OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {}

	void visitWithKind(QualType::DestructionKind DK, QualType QT,
	const FieldDecl *FD, bool InNonTrivialUnion) {
	if (const auto *AT = S.Context.getAsArrayType(QT))
	return this->asDerived().visit(S.Context.getBaseElementType(AT), FD,
	InNonTrivialUnion);
	return Super::visitWithKind(DK, QT, FD, InNonTrivialUnion);
	}

	void visitARCStrong(QualType QT, const FieldDecl *FD,
	bool InNonTrivialUnion) {
	if (InNonTrivialUnion)
	S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
	<< 1 << 1 << QT << FD->getName();
	}

	void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
	if (InNonTrivialUnion)
	S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
	<< 1 << 1 << QT << FD->getName();
	}

	void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
	const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
	if (RD->isUnion()) {
	if (OrigLoc.isValid()) {
	bool IsUnion = false;
	if (auto *OrigRD = OrigTy->getAsRecordDecl())
	IsUnion = OrigRD->isUnion();
	S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context)
	<< 1 << OrigTy << IsUnion << UseContext;
	// Reset OrigLoc so that this diagnostic is emitted only once.
	OrigLoc = SourceLocation();
	}
	InNonTrivialUnion = true;
	}

	if (InNonTrivialUnion)
	S.Diag(RD->getLocation(), diag::note_non_trivial_c_union)
	<< 0 << 1 << QT.getUnqualifiedType() << "";

	for (const FieldDecl *FD : RD->fields())
	if (!shouldIgnoreForRecordTriviality(FD))
	asDerived().visit(FD->getType(), FD, InNonTrivialUnion);
	}

	void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {}
	void visitCXXDestructor(QualType QT, const FieldDecl *FD,
	bool InNonTrivialUnion) {}

	// The non-trivial C union type or the struct/union type that contains a
	// non-trivial C union.
	QualType OrigTy;
	SourceLocation OrigLoc;
	Sema::NonTrivialCUnionContext UseContext;
	Sema &S;
	};

	struct DiagNonTrivalCUnionCopyVisitor
	: CopiedTypeVisitor<DiagNonTrivalCUnionCopyVisitor, false, void> {
	using Super = CopiedTypeVisitor<DiagNonTrivalCUnionCopyVisitor, false, void>;

	DiagNonTrivalCUnionCopyVisitor(QualType OrigTy, SourceLocation OrigLoc,
	Sema::NonTrivialCUnionContext UseContext,
	Sema &S)
	: OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {}

	void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType QT,
	const FieldDecl *FD, bool InNonTrivialUnion) {
	if (const auto *AT = S.Context.getAsArrayType(QT))
	return this->asDerived().visit(S.Context.getBaseElementType(AT), FD,
	InNonTrivialUnion);
	return Super::visitWithKind(PCK, QT, FD, InNonTrivialUnion);
	}

	void visitARCStrong(QualType QT, const FieldDecl *FD,
	bool InNonTrivialUnion) {
	if (InNonTrivialUnion)
	S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
	<< 1 << 2 << QT << FD->getName();
	}

	void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
	if (InNonTrivialUnion)
	S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
	<< 1 << 2 << QT << FD->getName();
	}

	void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
	const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
	if (RD->isUnion()) {
	if (OrigLoc.isValid()) {
	bool IsUnion = false;
	if (auto *OrigRD = OrigTy->getAsRecordDecl())
	IsUnion = OrigRD->isUnion();
	S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context)
	<< 2 << OrigTy << IsUnion << UseContext;
	// Reset OrigLoc so that this diagnostic is emitted only once.
	OrigLoc = SourceLocation();
	}
	InNonTrivialUnion = true;
	}

	if (InNonTrivialUnion)
	S.Diag(RD->getLocation(), diag::note_non_trivial_c_union)
	<< 0 << 2 << QT.getUnqualifiedType() << "";

	for (const FieldDecl *FD : RD->fields())
	if (!shouldIgnoreForRecordTriviality(FD))
	asDerived().visit(FD->getType(), FD, InNonTrivialUnion);
	}

	void preVisit(QualType::PrimitiveCopyKind PCK, QualType QT,
	const FieldDecl *FD, bool InNonTrivialUnion) {}
	void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {}
	void visitVolatileTrivial(QualType QT, const FieldDecl *FD,
	bool InNonTrivialUnion) {}

	// The non-trivial C union type or the struct/union type that contains a
	// non-trivial C union.
	QualType OrigTy;
	SourceLocation OrigLoc;
	Sema::NonTrivialCUnionContext UseContext;
	Sema &S;
	};

	} // namespace

	void Sema::checkNonTrivialCUnion(QualType QT, SourceLocation Loc,
	NonTrivialCUnionContext UseContext,
	unsigned NonTrivialKind) {
	assert((QT.hasNonTrivialToPrimitiveDefaultInitializeCUnion() \|\|
	QT.hasNonTrivialToPrimitiveDestructCUnion() \|\|
	QT.hasNonTrivialToPrimitiveCopyCUnion()) &&
	"shouldn't be called if type doesn't have a non-trivial C union");

	if ((NonTrivialKind & NTCUK_Init) &&
	QT.hasNonTrivialToPrimitiveDefaultInitializeCUnion())
	DiagNonTrivalCUnionDefaultInitializeVisitor(QT, Loc, UseContext, *this)
	.visit(QT, nullptr, false);
	if ((NonTrivialKind & NTCUK_Destruct) &&
	QT.hasNonTrivialToPrimitiveDestructCUnion())
	DiagNonTrivalCUnionDestructedTypeVisitor(QT, Loc, UseContext, *this)
	.visit(QT, nullptr, false);
	if ((NonTrivialKind & NTCUK_Copy) && QT.hasNonTrivialToPrimitiveCopyCUnion())
	DiagNonTrivalCUnionCopyVisitor(QT, Loc, UseContext, *this)
	.visit(QT, nullptr, false);
	}

	/// AddInitializerToDecl - Adds the initializer Init to the
	/// declaration dcl. If DirectInit is true, this is C++ direct
	/// initialization rather than copy initialization.
	void Sema::AddInitializerToDecl(Decl RealDecl, Expr Init, bool DirectInit) {
	// If there is no declaration, there was an error parsing it. Just ignore
	// the initializer.
	if (!RealDecl \|\| RealDecl->isInvalidDecl()) {
	CorrectDelayedTyposInExpr(Init, dyn_cast_or_null<VarDecl>(RealDecl));
	return;
	}

	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(RealDecl)) {
	// Pure-specifiers are handled in ActOnPureSpecifier.
	Diag(Method->getLocation(), diag::err_member_function_initialization)
	<< Method->getDeclName() << Init->getSourceRange();
	Method->setInvalidDecl();
	return;
	}

	VarDecl *VDecl = dyn_cast<VarDecl>(RealDecl);
	if (!VDecl) {
	assert(!isa<FieldDecl>(RealDecl) && "field init shouldn't get here");
	Diag(RealDecl->getLocation(), diag::err_illegal_initializer);
	RealDecl->setInvalidDecl();
	return;
	}

	// C++11 [decl.spec.auto]p6. Deduce the type which 'auto' stands in for.
	if (VDecl->getType()->isUndeducedType()) {
	// Attempt typo correction early so that the type of the init expression can
	// be deduced based on the chosen correction if the original init contains a
	// TypoExpr.
	ExprResult Res = CorrectDelayedTyposInExpr(Init, VDecl);
	if (!Res.isUsable()) {
	// There are unresolved typos in Init, just drop them.
	// FIXME: improve the recovery strategy to preserve the Init.
	RealDecl->setInvalidDecl();
	return;
	}
	if (Res.get()->containsErrors()) {
	// Invalidate the decl as we don't know the type for recovery-expr yet.
	RealDecl->setInvalidDecl();
	VDecl->setInit(Res.get());
	return;
	}
	Init = Res.get();

	if (DeduceVariableDeclarationType(VDecl, DirectInit, Init))
	return;
	}

	// dllimport cannot be used on variable definitions.
	if (VDecl->hasAttr<DLLImportAttr>() && !VDecl->isStaticDataMember()) {
	Diag(VDecl->getLocation(), diag::err_attribute_dllimport_data_definition);
	VDecl->setInvalidDecl();
	return;
	}

	// C99 6.7.8p5. If the declaration of an identifier has block scope, and
	// the identifier has external or internal linkage, the declaration shall
	// have no initializer for the identifier.
	// C++14 [dcl.init]p5 is the same restriction for C++.
	if (VDecl->isLocalVarDecl() && VDecl->hasExternalStorage()) {
	Diag(VDecl->getLocation(), diag::err_block_extern_cant_init);
	VDecl->setInvalidDecl();
	return;
	}

	if (!VDecl->getType()->isDependentType()) {
	// A definition must end up with a complete type, which means it must be
	// complete with the restriction that an array type might be completed by
	// the initializer; note that later code assumes this restriction.
	QualType BaseDeclType = VDecl->getType();
	if (const ArrayType *Array = Context.getAsIncompleteArrayType(BaseDeclType))
	BaseDeclType = Array->getElementType();
	if (RequireCompleteType(VDecl->getLocation(), BaseDeclType,
	diag::err_typecheck_decl_incomplete_type)) {
	RealDecl->setInvalidDecl();
	return;
	}

	// The variable can not have an abstract class type.
	if (RequireNonAbstractType(VDecl->getLocation(), VDecl->getType(),
	diag::err_abstract_type_in_decl,
	AbstractVariableType))
	VDecl->setInvalidDecl();
	}

	// C++ [module.import/6] external definitions are not permitted in header
	// units.
	if (getLangOpts().CPlusPlusModules && currentModuleIsHeaderUnit() &&
	!VDecl->isInvalidDecl() && VDecl->isThisDeclarationADefinition() &&
	VDecl->getFormalLinkage() == Linkage::ExternalLinkage &&
	!VDecl->isInline() && !VDecl->isTemplated() &&
	!isa<VarTemplateSpecializationDecl>(VDecl)) {
	Diag(VDecl->getLocation(), diag::err_extern_def_in_header_unit);
	VDecl->setInvalidDecl();
	}

	// If adding the initializer will turn this declaration into a definition,
	// and we already have a definition for this variable, diagnose or otherwise
	// handle the situation.
	if (VarDecl *Def = VDecl->getDefinition())
	if (Def != VDecl &&
	(!VDecl->isStaticDataMember() \|\| VDecl->isOutOfLine()) &&
	!VDecl->isThisDeclarationADemotedDefinition() &&
	checkVarDeclRedefinition(Def, VDecl))
	return;

	if (getLangOpts().CPlusPlus) {
	// C++ [class.static.data]p4
	// If a static data member is of const integral or const
	// enumeration type, its declaration in the class definition can
	// specify a constant-initializer which shall be an integral
	// constant expression (5.19). In that case, the member can appear
	// in integral constant expressions. The member shall still be
	// defined in a namespace scope if it is used in the program and the
	// namespace scope definition shall not contain an initializer.
	//
	// We already performed a redefinition check above, but for static
	// data members we also need to check whether there was an in-class
	// declaration with an initializer.
	if (VDecl->isStaticDataMember() && VDecl->getCanonicalDecl()->hasInit()) {
	Diag(Init->getExprLoc(), diag::err_static_data_member_reinitialization)
	<< VDecl->getDeclName();
	Diag(VDecl->getCanonicalDecl()->getInit()->getExprLoc(),
	diag::note_previous_initializer)
	<< 0;
	return;
	}

	if (VDecl->hasLocalStorage())
	setFunctionHasBranchProtectedScope();

	if (DiagnoseUnexpandedParameterPack(Init, UPPC_Initializer)) {
	VDecl->setInvalidDecl();
	return;
	}
	}

	// OpenCL 1.1 6.5.2: "Variables allocated in the __local address space inside
	// a kernel function cannot be initialized."
	if (VDecl->getType().getAddressSpace() == LangAS::opencl_local) {
	Diag(VDecl->getLocation(), diag::err_local_cant_init);
	VDecl->setInvalidDecl();
	return;
	}

	// The LoaderUninitialized attribute acts as a definition (of undef).
	if (VDecl->hasAttr<LoaderUninitializedAttr>()) {
	Diag(VDecl->getLocation(), diag::err_loader_uninitialized_cant_init);
	VDecl->setInvalidDecl();
	return;
	}

	// Get the decls type and save a reference for later, since
	// CheckInitializerTypes may change it.
	QualType DclT = VDecl->getType(), SavT = DclT;

	// Expressions default to 'id' when we're in a debugger
	// and we are assigning it to a variable of Objective-C pointer type.
	if (getLangOpts().DebuggerCastResultToId && DclT->isObjCObjectPointerType() &&
	Init->getType() == Context.UnknownAnyTy) {
	ExprResult Result = forceUnknownAnyToType(Init, Context.getObjCIdType());
	if (Result.isInvalid()) {
	VDecl->setInvalidDecl();
	return;
	}
	Init = Result.get();
	}

	// Perform the initialization.
	ParenListExpr *CXXDirectInit = dyn_cast<ParenListExpr>(Init);
	bool IsParenListInit = false;
	if (!VDecl->isInvalidDecl()) {
	InitializedEntity Entity = InitializedEntity::InitializeVariable(VDecl);
	InitializationKind Kind = InitializationKind::CreateForInit(
	VDecl->getLocation(), DirectInit, Init);

	MultiExprArg Args = Init;
	if (CXXDirectInit)
	Args = MultiExprArg(CXXDirectInit->getExprs(),
	CXXDirectInit->getNumExprs());

	// Try to correct any TypoExprs in the initialization arguments.
	for (size_t Idx = 0; Idx < Args.size(); ++Idx) {
	ExprResult Res = CorrectDelayedTyposInExpr(
	Args[Idx], VDecl, /RecoverUncorrectedTypos=/true,
	[this, Entity, Kind](Expr *E) {
	InitializationSequence Init(*this, Entity, Kind, MultiExprArg(E));
	return Init.Failed() ? ExprError() : E;
	});
	if (Res.isInvalid()) {
	VDecl->setInvalidDecl();
	} else if (Res.get() != Args[Idx]) {
	Args[Idx] = Res.get();
	}
	}
	if (VDecl->isInvalidDecl())
	return;

	InitializationSequence InitSeq(*this, Entity, Kind, Args,
	/TopLevelOfInitList=/false,
	/TreatUnavailableAsInvalid=/false);
	ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Args, &DclT);
	if (Result.isInvalid()) {
	// If the provided initializer fails to initialize the var decl,
	// we attach a recovery expr for better recovery.
	auto RecoveryExpr =
	CreateRecoveryExpr(Init->getBeginLoc(), Init->getEndLoc(), Args);
	if (RecoveryExpr.get())
	VDecl->setInit(RecoveryExpr.get());
	return;
	}

	Init = Result.getAs<Expr>();
	IsParenListInit = !InitSeq.steps().empty() &&
	InitSeq.step_begin()->Kind ==
	InitializationSequence::SK_ParenthesizedListInit;
	}

	// Check for self-references within variable initializers.
	// Variables declared within a function/method body (except for references)
	// are handled by a dataflow analysis.
	// This is undefined behavior in C++, but valid in C.
	if (getLangOpts().CPlusPlus)
	if (!VDecl->hasLocalStorage() \|\| VDecl->getType()->isRecordType() \|\|
	VDecl->getType()->isReferenceType())
	CheckSelfReference(*this, RealDecl, Init, DirectInit);

	// If the type changed, it means we had an incomplete type that was
	// completed by the initializer. For example:
	// int ary[] = { 1, 3, 5 };
	// "ary" transitions from an IncompleteArrayType to a ConstantArrayType.
	if (!VDecl->isInvalidDecl() && (DclT != SavT))
	VDecl->setType(DclT);

	if (!VDecl->isInvalidDecl()) {
	checkUnsafeAssigns(VDecl->getLocation(), VDecl->getType(), Init);

	if (VDecl->hasAttr<BlocksAttr>())
	checkRetainCycles(VDecl, Init);

	// It is safe to assign a weak reference into a strong variable.
	// Although this code can still have problems:
	// id x = self.weakProp;
	// id y = self.weakProp;
	// we do not warn to warn spuriously when 'x' and 'y' are on separate
	// paths through the function. This should be revisited if
	// -Wrepeated-use-of-weak is made flow-sensitive.
	if (FunctionScopeInfo *FSI = getCurFunction())
	if ((VDecl->getType().getObjCLifetime() == Qualifiers::OCL_Strong \|\|
	VDecl->getType().isNonWeakInMRRWithObjCWeak(Context)) &&
	!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak,
	Init->getBeginLoc()))
	FSI->markSafeWeakUse(Init);
	}

	// The initialization is usually a full-expression.
	//
	// FIXME: If this is a braced initialization of an aggregate, it is not
	// an expression, and each individual field initializer is a separate
	// full-expression. For instance, in:
	//
	// struct Temp { ~Temp(); };
	// struct S { S(Temp); };
	// struct T { S a, b; } t = { Temp(), Temp() }
	//
	// we should destroy the first Temp before constructing the second.
	ExprResult Result =
	ActOnFinishFullExpr(Init, VDecl->getLocation(),
	/DiscardedValue/ false, VDecl->isConstexpr());
	if (Result.isInvalid()) {
	VDecl->setInvalidDecl();
	return;
	}
	Init = Result.get();

	// Attach the initializer to the decl.
	VDecl->setInit(Init);

	if (VDecl->isLocalVarDecl()) {
	// Don't check the initializer if the declaration is malformed.
	if (VDecl->isInvalidDecl()) {
	// do nothing

	// OpenCL v1.2 s6.5.3: __constant locals must be constant-initialized.
	// This is true even in C++ for OpenCL.
	} else if (VDecl->getType().getAddressSpace() == LangAS::opencl_constant) {
	CheckForConstantInitializer(Init, DclT);

	// Otherwise, C++ does not restrict the initializer.
	} else if (getLangOpts().CPlusPlus) {
	// do nothing

	// C99 6.7.8p4: All the expressions in an initializer for an object that has
	// static storage duration shall be constant expressions or string literals.
	} else if (VDecl->getStorageClass() == SC_Static) {
	CheckForConstantInitializer(Init, DclT);

	// C89 is stricter than C99 for aggregate initializers.
	// C89 6.5.7p3: All the expressions [...] in an initializer list
	// for an object that has aggregate or union type shall be
	// constant expressions.
	} else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() &&
	isa<InitListExpr>(Init)) {
	const Expr *Culprit;
	if (!Init->isConstantInitializer(Context, false, &Culprit)) {
	Diag(Culprit->getExprLoc(),
	diag::ext_aggregate_init_not_constant)
	<< Culprit->getSourceRange();
	}
	}

	if (auto *E = dyn_cast<ExprWithCleanups>(Init))
	if (auto *BE = dyn_cast<BlockExpr>(E->getSubExpr()->IgnoreParens()))
	if (VDecl->hasLocalStorage())
	BE->getBlockDecl()->setCanAvoidCopyToHeap();
	} else if (VDecl->isStaticDataMember() && !VDecl->isInline() &&
	VDecl->getLexicalDeclContext()->isRecord()) {
	// This is an in-class initialization for a static data member, e.g.,
	//
	// struct S {
	// static const int value = 17;
	// };

	// C++ [class.mem]p4:
	// A member-declarator can contain a constant-initializer only
	// if it declares a static member (9.4) of const integral or
	// const enumeration type, see 9.4.2.
	//
	// C++11 [class.static.data]p3:
	// If a non-volatile non-inline const static data member is of integral
	// or enumeration type, its declaration in the class definition can
	// specify a brace-or-equal-initializer in which every initializer-clause
	// that is an assignment-expression is a constant expression. A static
	// data member of literal type can be declared in the class definition
	// with the constexpr specifier; if so, its declaration shall specify a
	// brace-or-equal-initializer in which every initializer-clause that is
	// an assignment-expression is a constant expression.

	// Do nothing on dependent types.
	if (DclT->isDependentType()) {

	// Allow any 'static constexpr' members, whether or not they are of literal
	// type. We separately check that every constexpr variable is of literal
	// type.
	} else if (VDecl->isConstexpr()) {

	// Require constness.
	} else if (!DclT.isConstQualified()) {
	Diag(VDecl->getLocation(), diag::err_in_class_initializer_non_const)
	<< Init->getSourceRange();
	VDecl->setInvalidDecl();

	// We allow integer constant expressions in all cases.
	} else if (DclT->isIntegralOrEnumerationType()) {
	// Check whether the expression is a constant expression.
	SourceLocation Loc;
	if (getLangOpts().CPlusPlus11 && DclT.isVolatileQualified())
	// In C++11, a non-constexpr const static data member with an
	// in-class initializer cannot be volatile.
	Diag(VDecl->getLocation(), diag::err_in_class_initializer_volatile);
	else if (Init->isValueDependent())
	; // Nothing to check.
	else if (Init->isIntegerConstantExpr(Context, &Loc))
	; // Ok, it's an ICE!
	else if (Init->getType()->isScopedEnumeralType() &&
	Init->isCXX11ConstantExpr(Context))
	; // Ok, it is a scoped-enum constant expression.
	else if (Init->isEvaluatable(Context)) {
	// If we can constant fold the initializer through heroics, accept it,
	// but report this as a use of an extension for -pedantic.
	Diag(Loc, diag::ext_in_class_initializer_non_constant)
	<< Init->getSourceRange();
	} else {
	// Otherwise, this is some crazy unknown case. Report the issue at the
	// location provided by the isIntegerConstantExpr failed check.
	Diag(Loc, diag::err_in_class_initializer_non_constant)
	<< Init->getSourceRange();
	VDecl->setInvalidDecl();
	}

	// We allow foldable floating-point constants as an extension.
	} else if (DclT->isFloatingType()) { // also permits complex, which is ok
	// In C++98, this is a GNU extension. In C++11, it is not, but we support
	// it anyway and provide a fixit to add the 'constexpr'.
	if (getLangOpts().CPlusPlus11) {
	Diag(VDecl->getLocation(),
	diag::ext_in_class_initializer_float_type_cxx11)
	<< DclT << Init->getSourceRange();
	Diag(VDecl->getBeginLoc(),
	diag::note_in_class_initializer_float_type_cxx11)
	<< FixItHint::CreateInsertion(VDecl->getBeginLoc(), "constexpr ");
	} else {
	Diag(VDecl->getLocation(), diag::ext_in_class_initializer_float_type)
	<< DclT << Init->getSourceRange();

	if (!Init->isValueDependent() && !Init->isEvaluatable(Context)) {
	Diag(Init->getExprLoc(), diag::err_in_class_initializer_non_constant)
	<< Init->getSourceRange();
	VDecl->setInvalidDecl();
	}
	}

	// Suggest adding 'constexpr' in C++11 for literal types.
	} else if (getLangOpts().CPlusPlus11 && DclT->isLiteralType(Context)) {
	Diag(VDecl->getLocation(), diag::err_in_class_initializer_literal_type)
	<< DclT << Init->getSourceRange()
	<< FixItHint::CreateInsertion(VDecl->getBeginLoc(), "constexpr ");
	VDecl->setConstexpr(true);

	} else {
	Diag(VDecl->getLocation(), diag::err_in_class_initializer_bad_type)
	<< DclT << Init->getSourceRange();
	VDecl->setInvalidDecl();
	}
	} else if (VDecl->isFileVarDecl()) {
	// In C, extern is typically used to avoid tentative definitions when
	// declaring variables in headers, but adding an intializer makes it a
	// definition. This is somewhat confusing, so GCC and Clang both warn on it.
	// In C++, extern is often used to give implictly static const variables
	// external linkage, so don't warn in that case. If selectany is present,
	// this might be header code intended for C and C++ inclusion, so apply the
	// C++ rules.
	if (VDecl->getStorageClass() == SC_Extern &&
	((!getLangOpts().CPlusPlus && !VDecl->hasAttr<SelectAnyAttr>()) \|\|
	!Context.getBaseElementType(VDecl->getType()).isConstQualified()) &&
	!(getLangOpts().CPlusPlus && VDecl->isExternC()) &&
	!isTemplateInstantiation(VDecl->getTemplateSpecializationKind()))
	Diag(VDecl->getLocation(), diag::warn_extern_init);

	// In Microsoft C++ mode, a const variable defined in namespace scope has
	// external linkage by default if the variable is declared with
	// __declspec(dllexport).
	if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	getLangOpts().CPlusPlus && VDecl->getType().isConstQualified() &&
	VDecl->hasAttr<DLLExportAttr>() && VDecl->getDefinition())
	VDecl->setStorageClass(SC_Extern);

	// C99 6.7.8p4. All file scoped initializers need to be constant.
	if (!getLangOpts().CPlusPlus && !VDecl->isInvalidDecl())
	CheckForConstantInitializer(Init, DclT);
	}

	QualType InitType = Init->getType();
	if (!InitType.isNull() &&
	(InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion() \|\|
	InitType.hasNonTrivialToPrimitiveCopyCUnion()))
	checkNonTrivialCUnionInInitializer(Init, Init->getExprLoc());

	// We will represent direct-initialization similarly to copy-initialization:
	// int x(1); -as-> int x = 1;
	// ClassType x(a,b,c); -as-> ClassType x = ClassType(a,b,c);
	//
	// Clients that want to distinguish between the two forms, can check for
	// direct initializer using VarDecl::getInitStyle().
	// A major benefit is that clients that don't particularly care about which
	// exactly form was it (like the CodeGen) can handle both cases without
	// special case code.

	// C++ 8.5p11:
	// The form of initialization (using parentheses or '=') is generally
	// insignificant, but does matter when the entity being initialized has a
	// class type.
	if (CXXDirectInit) {
	assert(DirectInit && "Call-style initializer must be direct init.");
	VDecl->setInitStyle(IsParenListInit ? VarDecl::ParenListInit
	: VarDecl::CallInit);
	} else if (DirectInit) {
	// This must be list-initialization. No other way is direct-initialization.
	VDecl->setInitStyle(VarDecl::ListInit);
	}

	if (LangOpts.OpenMP &&
	(LangOpts.OpenMPIsDevice \|\| !LangOpts.OMPTargetTriples.empty()) &&
	VDecl->isFileVarDecl())
	DeclsToCheckForDeferredDiags.insert(VDecl);
	CheckCompleteVariableDeclaration(VDecl);
	}

	/// ActOnInitializerError - Given that there was an error parsing an
	/// initializer for the given declaration, try to at least re-establish
	/// invariants such as whether a variable's type is either dependent or
	/// complete.
	void Sema::ActOnInitializerError(Decl *D) {
	// Our main concern here is re-establishing invariants like "a
	// variable's type is either dependent or complete".
	if (!D \|\| D->isInvalidDecl()) return;

	VarDecl *VD = dyn_cast<VarDecl>(D);
	if (!VD) return;

	// Bindings are not usable if we can't make sense of the initializer.
	if (auto *DD = dyn_cast<DecompositionDecl>(D))
	for (auto *BD : DD->bindings())
	BD->setInvalidDecl();

	// Auto types are meaningless if we can't make sense of the initializer.
	if (VD->getType()->isUndeducedType()) {
	D->setInvalidDecl();
	return;
	}

	QualType Ty = VD->getType();
	if (Ty->isDependentType()) return;

	// Require a complete type.
	if (RequireCompleteType(VD->getLocation(),
	Context.getBaseElementType(Ty),
	diag::err_typecheck_decl_incomplete_type)) {
	VD->setInvalidDecl();
	return;
	}

	// Require a non-abstract type.
	if (RequireNonAbstractType(VD->getLocation(), Ty,
	diag::err_abstract_type_in_decl,
	AbstractVariableType)) {
	VD->setInvalidDecl();
	return;
	}

	// Don't bother complaining about constructors or destructors,
	// though.
	}

	void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
	// If there is no declaration, there was an error parsing it. Just ignore it.
	if (!RealDecl)
	return;

	if (VarDecl *Var = dyn_cast<VarDecl>(RealDecl)) {
	QualType Type = Var->getType();

	// C++1z [dcl.dcl]p1 grammar implies that an initializer is mandatory.
	if (isa<DecompositionDecl>(RealDecl)) {
	Diag(Var->getLocation(), diag::err_decomp_decl_requires_init) << Var;
	Var->setInvalidDecl();
	return;
	}

	if (Type->isUndeducedType() &&
	DeduceVariableDeclarationType(Var, false, nullptr))
	return;

	// C++11 [class.static.data]p3: A static data member can be declared with
	// the constexpr specifier; if so, its declaration shall specify
	// a brace-or-equal-initializer.
	// C++11 [dcl.constexpr]p1: The constexpr specifier shall be applied only to
	// the definition of a variable [...] or the declaration of a static data
	// member.
	if (Var->isConstexpr() && !Var->isThisDeclarationADefinition() &&
	!Var->isThisDeclarationADemotedDefinition()) {
	if (Var->isStaticDataMember()) {
	// C++1z removes the relevant rule; the in-class declaration is always
	// a definition there.
	if (!getLangOpts().CPlusPlus17 &&
	!Context.getTargetInfo().getCXXABI().isMicrosoft()) {
	Diag(Var->getLocation(),
	diag::err_constexpr_static_mem_var_requires_init)
	<< Var;
	Var->setInvalidDecl();
	return;
	}
	} else {
	Diag(Var->getLocation(), diag::err_invalid_constexpr_var_decl);
	Var->setInvalidDecl();
	return;
	}
	}

	// OpenCL v1.1 s6.5.3: variables declared in the constant address space must
	// be initialized.
	if (!Var->isInvalidDecl() &&
	Var->getType().getAddressSpace() == LangAS::opencl_constant &&
	Var->getStorageClass() != SC_Extern && !Var->getInit()) {
	bool HasConstExprDefaultConstructor = false;
	if (CXXRecordDecl *RD = Var->getType()->getAsCXXRecordDecl()) {
	for (auto *Ctor : RD->ctors()) {
	if (Ctor->isConstexpr() && Ctor->getNumParams() == 0 &&
	Ctor->getMethodQualifiers().getAddressSpace() ==
	LangAS::opencl_constant) {
	HasConstExprDefaultConstructor = true;
	}
	}
	}
	if (!HasConstExprDefaultConstructor) {
	Diag(Var->getLocation(), diag::err_opencl_constant_no_init);
	Var->setInvalidDecl();
	return;
	}
	}

	if (!Var->isInvalidDecl() && RealDecl->hasAttr<LoaderUninitializedAttr>()) {
	if (Var->getStorageClass() == SC_Extern) {
	Diag(Var->getLocation(), diag::err_loader_uninitialized_extern_decl)
	<< Var;
	Var->setInvalidDecl();
	return;
	}
	if (RequireCompleteType(Var->getLocation(), Var->getType(),
	diag::err_typecheck_decl_incomplete_type)) {
	Var->setInvalidDecl();
	return;
	}
	if (CXXRecordDecl *RD = Var->getType()->getAsCXXRecordDecl()) {
	if (!RD->hasTrivialDefaultConstructor()) {
	Diag(Var->getLocation(), diag::err_loader_uninitialized_trivial_ctor);
	Var->setInvalidDecl();
	return;
	}
	}
	// The declaration is unitialized, no need for further checks.
	return;
	}

	VarDecl::DefinitionKind DefKind = Var->isThisDeclarationADefinition();
	if (!Var->isInvalidDecl() && DefKind != VarDecl::DeclarationOnly &&
	Var->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion())
	checkNonTrivialCUnion(Var->getType(), Var->getLocation(),
	NTCUC_DefaultInitializedObject, NTCUK_Init);


	switch (DefKind) {
	case VarDecl::Definition:
	if (!Var->isStaticDataMember() \|\| !Var->getAnyInitializer())
	break;

	// We have an out-of-line definition of a static data member
	// that has an in-class initializer, so we type-check this like
	// a declaration.
	//
	[[fallthrough]];

	case VarDecl::DeclarationOnly:
	// It's only a declaration.

	// Block scope. C99 6.7p7: If an identifier for an object is
	// declared with no linkage (C99 6.2.2p6), the type for the
	// object shall be complete.
	if (!Type->isDependentType() && Var->isLocalVarDecl() &&
	!Var->hasLinkage() && !Var->isInvalidDecl() &&
	RequireCompleteType(Var->getLocation(), Type,
	diag::err_typecheck_decl_incomplete_type))
	Var->setInvalidDecl();

	// Make sure that the type is not abstract.
	if (!Type->isDependentType() && !Var->isInvalidDecl() &&
	RequireNonAbstractType(Var->getLocation(), Type,
	diag::err_abstract_type_in_decl,
	AbstractVariableType))
	Var->setInvalidDecl();
	if (!Type->isDependentType() && !Var->isInvalidDecl() &&
	Var->getStorageClass() == SC_PrivateExtern) {
	Diag(Var->getLocation(), diag::warn_private_extern);
	Diag(Var->getLocation(), diag::note_private_extern);
	}

	if (Context.getTargetInfo().allowDebugInfoForExternalRef() &&
	!Var->isInvalidDecl() && !getLangOpts().CPlusPlus)
	ExternalDeclarations.push_back(Var);

	return;

	case VarDecl::TentativeDefinition:
	// File scope. C99 6.9.2p2: A declaration of an identifier for an
	// object that has file scope without an initializer, and without a
	// storage-class specifier or with the storage-class specifier "static",
	// constitutes a tentative definition. Note: A tentative definition with
	// external linkage is valid (C99 6.2.2p5).
	if (!Var->isInvalidDecl()) {
	if (const IncompleteArrayType *ArrayT
	= Context.getAsIncompleteArrayType(Type)) {
	if (RequireCompleteSizedType(
	Var->getLocation(), ArrayT->getElementType(),
	diag::err_array_incomplete_or_sizeless_type))
	Var->setInvalidDecl();
	} else if (Var->getStorageClass() == SC_Static) {
	// C99 6.9.2p3: If the declaration of an identifier for an object is
	// a tentative definition and has internal linkage (C99 6.2.2p3), the
	// declared type shall not be an incomplete type.
	// NOTE: code such as the following
	// static struct s;
	// struct s { int a; };
	// is accepted by gcc. Hence here we issue a warning instead of
	// an error and we do not invalidate the static declaration.
	// NOTE: to avoid multiple warnings, only check the first declaration.
	if (Var->isFirstDecl())
	RequireCompleteType(Var->getLocation(), Type,
	diag::ext_typecheck_decl_incomplete_type);
	}
	}

	// Record the tentative definition; we're done.
	if (!Var->isInvalidDecl())
	TentativeDefinitions.push_back(Var);
	return;
	}

	// Provide a specific diagnostic for uninitialized variable
	// definitions with incomplete array type.
	if (Type->isIncompleteArrayType()) {
	if (Var->isConstexpr())
	Diag(Var->getLocation(), diag::err_constexpr_var_requires_const_init)
	<< Var;
	else
	Diag(Var->getLocation(),
	diag::err_typecheck_incomplete_array_needs_initializer);
	Var->setInvalidDecl();
	return;
	}

	// Provide a specific diagnostic for uninitialized variable
	// definitions with reference type.
	if (Type->isReferenceType()) {
	Diag(Var->getLocation(), diag::err_reference_var_requires_init)
	<< Var << SourceRange(Var->getLocation(), Var->getLocation());
	return;
	}

	// Do not attempt to type-check the default initializer for a
	// variable with dependent type.
	if (Type->isDependentType())
	return;

	if (Var->isInvalidDecl())
	return;

	if (!Var->hasAttr<AliasAttr>()) {
	if (RequireCompleteType(Var->getLocation(),
	Context.getBaseElementType(Type),
	diag::err_typecheck_decl_incomplete_type)) {
	Var->setInvalidDecl();
	return;
	}
	} else {
	return;
	}

	// The variable can not have an abstract class type.
	if (RequireNonAbstractType(Var->getLocation(), Type,
	diag::err_abstract_type_in_decl,
	AbstractVariableType)) {
	Var->setInvalidDecl();
	return;
	}

	// Check for jumps past the implicit initializer. C++0x
	// clarifies that this applies to a "variable with automatic
	// storage duration", not a "local variable".
	// C++11 [stmt.dcl]p3
	// A program that jumps from a point where a variable with automatic
	// storage duration is not in scope to a point where it is in scope is
	// ill-formed unless the variable has scalar type, class type with a
	// trivial default constructor and a trivial destructor, a cv-qualified
	// version of one of these types, or an array of one of the preceding
	// types and is declared without an initializer.
	if (getLangOpts().CPlusPlus && Var->hasLocalStorage()) {
	if (const RecordType *Record
	= Context.getBaseElementType(Type)->getAs<RecordType>()) {
	CXXRecordDecl *CXXRecord = cast<CXXRecordDecl>(Record->getDecl());
	// Mark the function (if we're in one) for further checking even if the
	// looser rules of C++11 do not require such checks, so that we can
	// diagnose incompatibilities with C++98.
	if (!CXXRecord->isPOD())
	setFunctionHasBranchProtectedScope();
	}
	}
	// In OpenCL, we can't initialize objects in the __local address space,
	// even implicitly, so don't synthesize an implicit initializer.
	if (getLangOpts().OpenCL &&
	Var->getType().getAddressSpace() == LangAS::opencl_local)
	return;
	// C++03 [dcl.init]p9:
	// If no initializer is specified for an object, and the
	// object is of (possibly cv-qualified) non-POD class type (or
	// array thereof), the object shall be default-initialized; if
	// the object is of const-qualified type, the underlying class
	// type shall have a user-declared default
	// constructor. Otherwise, if no initializer is specified for
	// a non- static object, the object and its subobjects, if
	// any, have an indeterminate initial value); if the object
	// or any of its subobjects are of const-qualified type, the
	// program is ill-formed.
	// C++0x [dcl.init]p11:
	// If no initializer is specified for an object, the object is
	// default-initialized; [...].
	InitializedEntity Entity = InitializedEntity::InitializeVariable(Var);
	InitializationKind Kind
	= InitializationKind::CreateDefault(Var->getLocation());

	InitializationSequence InitSeq(*this, Entity, Kind, std::nullopt);
	ExprResult Init = InitSeq.Perform(*this, Entity, Kind, std::nullopt);

	if (Init.get()) {
	Var->setInit(MaybeCreateExprWithCleanups(Init.get()));
	// This is important for template substitution.
	Var->setInitStyle(VarDecl::CallInit);
	} else if (Init.isInvalid()) {
	// If default-init fails, attach a recovery-expr initializer to track
	// that initialization was attempted and failed.
	auto RecoveryExpr =
	CreateRecoveryExpr(Var->getLocation(), Var->getLocation(), {});
	if (RecoveryExpr.get())
	Var->setInit(RecoveryExpr.get());
	}

	CheckCompleteVariableDeclaration(Var);
	}
	}

	void Sema::ActOnCXXForRangeDecl(Decl *D) {
	// If there is no declaration, there was an error parsing it. Ignore it.
	if (!D)
	return;

	VarDecl *VD = dyn_cast<VarDecl>(D);
	if (!VD) {
	Diag(D->getLocation(), diag::err_for_range_decl_must_be_var);
	D->setInvalidDecl();
	return;
	}

	VD->setCXXForRangeDecl(true);

	// for-range-declaration cannot be given a storage class specifier.
	int Error = -1;
	switch (VD->getStorageClass()) {
	case SC_None:
	break;
	case SC_Extern:
	Error = 0;
	break;
	case SC_Static:
	Error = 1;
	break;
	case SC_PrivateExtern:
	Error = 2;
	break;
	case SC_Auto:
	Error = 3;
	break;
	case SC_Register:
	Error = 4;
	break;
	}

	// for-range-declaration cannot be given a storage class specifier con't.
	switch (VD->getTSCSpec()) {
	case TSCS_thread_local:
	Error = 6;
	break;
	case TSCS___thread:
	case TSCS__Thread_local:
	case TSCS_unspecified:
	break;
	}

	if (Error != -1) {
	Diag(VD->getOuterLocStart(), diag::err_for_range_storage_class)
	<< VD << Error;
	D->setInvalidDecl();
	}
	}

	StmtResult Sema::ActOnCXXForRangeIdentifier(Scope *S, SourceLocation IdentLoc,
	IdentifierInfo *Ident,
	ParsedAttributes &Attrs) {
	// C++1y [stmt.iter]p1:
	// A range-based for statement of the form
	// for ( for-range-identifier : for-range-initializer ) statement
	// is equivalent to
	// for ( auto&& for-range-identifier : for-range-initializer ) statement
	DeclSpec DS(Attrs.getPool().getFactory());

	const char *PrevSpec;
	unsigned DiagID;
	DS.SetTypeSpecType(DeclSpec::TST_auto, IdentLoc, PrevSpec, DiagID,
	getPrintingPolicy());

	Declarator D(DS, ParsedAttributesView::none(), DeclaratorContext::ForInit);
	D.SetIdentifier(Ident, IdentLoc);
	D.takeAttributes(Attrs);

	D.AddTypeInfo(DeclaratorChunk::getReference(0, IdentLoc, /lvalue/ false),
	IdentLoc);
	Decl *Var = ActOnDeclarator(S, D);
	cast<VarDecl>(Var)->setCXXForRangeDecl(true);
	FinalizeDeclaration(Var);
	return ActOnDeclStmt(FinalizeDeclaratorGroup(S, DS, Var), IdentLoc,
	Attrs.Range.getEnd().isValid() ? Attrs.Range.getEnd()
	: IdentLoc);
	}

	void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
	if (var->isInvalidDecl()) return;

	MaybeAddCUDAConstantAttr(var);

	if (getLangOpts().OpenCL) {
	// OpenCL v2.0 s6.12.5 - Every block variable declaration must have an
	// initialiser
	if (var->getTypeSourceInfo()->getType()->isBlockPointerType() &&
	!var->hasInit()) {
	Diag(var->getLocation(), diag::err_opencl_invalid_block_declaration)
	<< 1 /Init/;
	var->setInvalidDecl();
	return;
	}
	}

	// In Objective-C, don't allow jumps past the implicit initialization of a
	// local retaining variable.
	if (getLangOpts().ObjC &&
	var->hasLocalStorage()) {
	switch (var->getType().getObjCLifetime()) {
	case Qualifiers::OCL_None:
	case Qualifiers::OCL_ExplicitNone:
	case Qualifiers::OCL_Autoreleasing:
	break;

	case Qualifiers::OCL_Weak:
	case Qualifiers::OCL_Strong:
	setFunctionHasBranchProtectedScope();
	break;
	}
	}

	if (var->hasLocalStorage() &&
	var->getType().isDestructedType() == QualType::DK_nontrivial_c_struct)
	setFunctionHasBranchProtectedScope();

	// Warn about externally-visible variables being defined without a
	// prior declaration. We only want to do this for global
	// declarations, but we also specifically need to avoid doing it for
	// class members because the linkage of an anonymous class can
	// change if it's later given a typedef name.
	if (var->isThisDeclarationADefinition() &&
	var->getDeclContext()->getRedeclContext()->isFileContext() &&
	var->isExternallyVisible() && var->hasLinkage() &&
	!var->isInline() && !var->getDescribedVarTemplate() &&
	!isa<VarTemplatePartialSpecializationDecl>(var) &&
	!isTemplateInstantiation(var->getTemplateSpecializationKind()) &&
	!getDiagnostics().isIgnored(diag::warn_missing_variable_declarations,
	var->getLocation())) {
	// Find a previous declaration that's not a definition.
	VarDecl *prev = var->getPreviousDecl();
	while (prev && prev->isThisDeclarationADefinition())
	prev = prev->getPreviousDecl();

	if (!prev) {
	Diag(var->getLocation(), diag::warn_missing_variable_declarations) << var;
	Diag(var->getTypeSpecStartLoc(), diag::note_static_for_internal_linkage)
	<< /* variable */ 0;
	}
	}

	// Cache the result of checking for constant initialization.
	std::optional<bool> CacheHasConstInit;
	const Expr *CacheCulprit = nullptr;
	auto checkConstInit = [&]() mutable {
	if (!CacheHasConstInit)
	CacheHasConstInit = var->getInit()->isConstantInitializer(
	Context, var->getType()->isReferenceType(), &CacheCulprit);
	return *CacheHasConstInit;
	};

	if (var->getTLSKind() == VarDecl::TLS_Static) {
	if (var->getType().isDestructedType()) {
	// GNU C++98 edits for __thread, [basic.start.term]p3:
	// The type of an object with thread storage duration shall not
	// have a non-trivial destructor.
	Diag(var->getLocation(), diag::err_thread_nontrivial_dtor);
	if (getLangOpts().CPlusPlus11)
	Diag(var->getLocation(), diag::note_use_thread_local);
	} else if (getLangOpts().CPlusPlus && var->hasInit()) {
	if (!checkConstInit()) {
	// GNU C++98 edits for __thread, [basic.start.init]p4:
	// An object of thread storage duration shall not require dynamic
	// initialization.
	// FIXME: Need strict checking here.
	Diag(CacheCulprit->getExprLoc(), diag::err_thread_dynamic_init)
	<< CacheCulprit->getSourceRange();
	if (getLangOpts().CPlusPlus11)
	Diag(var->getLocation(), diag::note_use_thread_local);
	}
	}
	}


	if (!var->getType()->isStructureType() && var->hasInit() &&
	isa<InitListExpr>(var->getInit())) {
	const auto *ILE = cast<InitListExpr>(var->getInit());
	unsigned NumInits = ILE->getNumInits();
	if (NumInits > 2)
	for (unsigned I = 0; I < NumInits; ++I) {
	const auto *Init = ILE->getInit(I);
	if (!Init)
	break;
	const auto *SL = dyn_cast<StringLiteral>(Init->IgnoreImpCasts());
	if (!SL)
	break;

	unsigned NumConcat = SL->getNumConcatenated();
	// Diagnose missing comma in string array initialization.
	// Do not warn when all the elements in the initializer are concatenated
	// together. Do not warn for macros too.
	if (NumConcat == 2 && !SL->getBeginLoc().isMacroID()) {
	bool OnlyOneMissingComma = true;
	for (unsigned J = I + 1; J < NumInits; ++J) {
	const auto *Init = ILE->getInit(J);
	if (!Init)
	break;
	const auto *SLJ = dyn_cast<StringLiteral>(Init->IgnoreImpCasts());
	if (!SLJ \|\| SLJ->getNumConcatenated() > 1) {
	OnlyOneMissingComma = false;
	break;
	}
	}

	if (OnlyOneMissingComma) {
	SmallVector<FixItHint, 1> Hints;
	for (unsigned i = 0; i < NumConcat - 1; ++i)
	Hints.push_back(FixItHint::CreateInsertion(
	PP.getLocForEndOfToken(SL->getStrTokenLoc(i)), ","));

	Diag(SL->getStrTokenLoc(1),
	diag::warn_concatenated_literal_array_init)
	<< Hints;
	Diag(SL->getBeginLoc(),
	diag::note_concatenated_string_literal_silence);
	}
	// In any case, stop now.
	break;
	}
	}
	}


	QualType type = var->getType();

	if (var->hasAttr<BlocksAttr>())
	getCurFunction()->addByrefBlockVar(var);

	Expr *Init = var->getInit();
	bool GlobalStorage = var->hasGlobalStorage();
	bool IsGlobal = GlobalStorage && !var->isStaticLocal();
	QualType baseType = Context.getBaseElementType(type);
	bool HasConstInit = true;

	// Check whether the initializer is sufficiently constant.
	if (getLangOpts().CPlusPlus && !type->isDependentType() && Init &&
	!Init->isValueDependent() &&
	(GlobalStorage \|\| var->isConstexpr() \|\|
	var->mightBeUsableInConstantExpressions(Context))) {
	// If this variable might have a constant initializer or might be usable in
	// constant expressions, check whether or not it actually is now. We can't
	// do this lazily, because the result might depend on things that change
	// later, such as which constexpr functions happen to be defined.
	SmallVector<PartialDiagnosticAt, 8> Notes;
	if (!getLangOpts().CPlusPlus11) {
	// Prior to C++11, in contexts where a constant initializer is required,
	// the set of valid constant initializers is described by syntactic rules
	// in [expr.const]p2-6.
	// FIXME: Stricter checking for these rules would be useful for constinit /
	// -Wglobal-constructors.
	HasConstInit = checkConstInit();

	// Compute and cache the constant value, and remember that we have a
	// constant initializer.
	if (HasConstInit) {
	(void)var->checkForConstantInitialization(Notes);
	Notes.clear();
	} else if (CacheCulprit) {
	Notes.emplace_back(CacheCulprit->getExprLoc(),
	PDiag(diag::note_invalid_subexpr_in_const_expr));
	Notes.back().second << CacheCulprit->getSourceRange();
	}
	} else {
	// Evaluate the initializer to see if it's a constant initializer.
	HasConstInit = var->checkForConstantInitialization(Notes);
	}

	if (HasConstInit) {
	// FIXME: Consider replacing the initializer with a ConstantExpr.
	} else if (var->isConstexpr()) {
	SourceLocation DiagLoc = var->getLocation();
	// If the note doesn't add any useful information other than a source
	// location, fold it into the primary diagnostic.
	if (Notes.size() == 1 && Notes[0].second.getDiagID() ==
	diag::note_invalid_subexpr_in_const_expr) {
	DiagLoc = Notes[0].first;
	Notes.clear();
	}
	Diag(DiagLoc, diag::err_constexpr_var_requires_const_init)
	<< var << Init->getSourceRange();
	for (unsigned I = 0, N = Notes.size(); I != N; ++I)
	Diag(Notes[I].first, Notes[I].second);
	} else if (GlobalStorage && var->hasAttr<ConstInitAttr>()) {
	auto *Attr = var->getAttr<ConstInitAttr>();
	Diag(var->getLocation(), diag::err_require_constant_init_failed)
	<< Init->getSourceRange();
	Diag(Attr->getLocation(), diag::note_declared_required_constant_init_here)
	<< Attr->getRange() << Attr->isConstinit();
	for (auto &it : Notes)
	Diag(it.first, it.second);
	} else if (IsGlobal &&
	!getDiagnostics().isIgnored(diag::warn_global_constructor,
	var->getLocation())) {
	// Warn about globals which don't have a constant initializer. Don't
	// warn about globals with a non-trivial destructor because we already
	// warned about them.
	CXXRecordDecl *RD = baseType->getAsCXXRecordDecl();
	if (!(RD && !RD->hasTrivialDestructor())) {
	// checkConstInit() here permits trivial default initialization even in
	// C++11 onwards, where such an initializer is not a constant initializer
	// but nonetheless doesn't require a global constructor.
	if (!checkConstInit())
	Diag(var->getLocation(), diag::warn_global_constructor)
	<< Init->getSourceRange();
	}
	}
	}

	// Apply section attributes and pragmas to global variables.
	if (GlobalStorage && var->isThisDeclarationADefinition() &&
	!inTemplateInstantiation()) {
	PragmaStack<StringLiteral > Stack = nullptr;
	int SectionFlags = ASTContext::PSF_Read;
	if (var->getType().isConstQualified()) {
	if (HasConstInit)
	Stack = &ConstSegStack;
	else {
	Stack = &BSSSegStack;
	SectionFlags \|= ASTContext::PSF_Write;
	}
	} else if (var->hasInit() && HasConstInit) {
	Stack = &DataSegStack;
	SectionFlags \|= ASTContext::PSF_Write;
	} else {
	Stack = &BSSSegStack;
	SectionFlags \|= ASTContext::PSF_Write;
	}
	if (const SectionAttr *SA = var->getAttr<SectionAttr>()) {
	if (SA->getSyntax() == AttributeCommonInfo::AS_Declspec)
	SectionFlags \|= ASTContext::PSF_Implicit;
	UnifySection(SA->getName(), SectionFlags, var);
	} else if (Stack->CurrentValue) {
	SectionFlags \|= ASTContext::PSF_Implicit;
	auto SectionName = Stack->CurrentValue->getString();
	var->addAttr(SectionAttr::CreateImplicit(
	Context, SectionName, Stack->CurrentPragmaLocation,
	AttributeCommonInfo::AS_Pragma, SectionAttr::Declspec_allocate));
	if (UnifySection(SectionName, SectionFlags, var))
	var->dropAttr<SectionAttr>();
	}

	// Apply the init_seg attribute if this has an initializer. If the
	// initializer turns out to not be dynamic, we'll end up ignoring this
	// attribute.
	if (CurInitSeg && var->getInit())
	var->addAttr(InitSegAttr::CreateImplicit(Context, CurInitSeg->getString(),
	CurInitSegLoc,
	AttributeCommonInfo::AS_Pragma));
	}

	// All the following checks are C++ only.
	if (!getLangOpts().CPlusPlus) {
	// If this variable must be emitted, add it as an initializer for the
	// current module.
	if (Context.DeclMustBeEmitted(var) && !ModuleScopes.empty())
	Context.addModuleInitializer(ModuleScopes.back().Module, var);
	return;
	}

	// Require the destructor.
	if (!type->isDependentType())
	if (const RecordType *recordType = baseType->getAs<RecordType>())
	FinalizeVarWithDestructor(var, recordType);

	// If this variable must be emitted, add it as an initializer for the current
	// module.
	if (Context.DeclMustBeEmitted(var) && !ModuleScopes.empty())
	Context.addModuleInitializer(ModuleScopes.back().Module, var);

	// Build the bindings if this is a structured binding declaration.
	if (auto *DD = dyn_cast<DecompositionDecl>(var))
	CheckCompleteDecompositionDeclaration(DD);
	}

	/// Check if VD needs to be dllexport/dllimport due to being in a
	/// dllexport/import function.
	void Sema::CheckStaticLocalForDllExport(VarDecl *VD) {
	assert(VD->isStaticLocal());

	auto *FD = dyn_cast_or_null<FunctionDecl>(VD->getParentFunctionOrMethod());

	// Find outermost function when VD is in lambda function.
	while (FD && !getDLLAttr(FD) &&
	!FD->hasAttr<DLLExportStaticLocalAttr>() &&
	!FD->hasAttr<DLLImportStaticLocalAttr>()) {
	FD = dyn_cast_or_null<FunctionDecl>(FD->getParentFunctionOrMethod());
	}

	if (!FD)
	return;

	// Static locals inherit dll attributes from their function.
	if (Attr *A = getDLLAttr(FD)) {
	auto *NewAttr = cast<InheritableAttr>(A->clone(getASTContext()));
	NewAttr->setInherited(true);
	VD->addAttr(NewAttr);
	} else if (Attr *A = FD->getAttr<DLLExportStaticLocalAttr>()) {
	auto NewAttr = DLLExportAttr::CreateImplicit(getASTContext(), A);
	NewAttr->setInherited(true);
	VD->addAttr(NewAttr);

	// Export this function to enforce exporting this static variable even
	// if it is not used in this compilation unit.
	if (!FD->hasAttr<DLLExportAttr>())
	FD->addAttr(NewAttr);

	} else if (Attr *A = FD->getAttr<DLLImportStaticLocalAttr>()) {
	auto NewAttr = DLLImportAttr::CreateImplicit(getASTContext(), A);
	NewAttr->setInherited(true);
	VD->addAttr(NewAttr);
	}
	}

	void Sema::CheckThreadLocalForLargeAlignment(VarDecl *VD) {
	assert(VD->getTLSKind());

	// Perform TLS alignment check here after attributes attached to the variable
	// which may affect the alignment have been processed. Only perform the check
	// if the target has a maximum TLS alignment (zero means no constraints).
	if (unsigned MaxAlign = Context.getTargetInfo().getMaxTLSAlign()) {
	// Protect the check so that it's not performed on dependent types and
	// dependent alignments (we can't determine the alignment in that case).
	if (!VD->hasDependentAlignment()) {
	CharUnits MaxAlignChars = Context.toCharUnitsFromBits(MaxAlign);
	if (Context.getDeclAlign(VD) > MaxAlignChars) {
	Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum)
	<< (unsigned)Context.getDeclAlign(VD).getQuantity() << VD
	<< (unsigned)MaxAlignChars.getQuantity();
	}
	}
	}
	}

	/// FinalizeDeclaration - called by ParseDeclarationAfterDeclarator to perform
	/// any semantic actions necessary after any initializer has been attached.
	void Sema::FinalizeDeclaration(Decl *ThisDecl) {
	// Note that we are no longer parsing the initializer for this declaration.
	ParsingInitForAutoVars.erase(ThisDecl);

	VarDecl *VD = dyn_cast_or_null<VarDecl>(ThisDecl);
	if (!VD)
	return;

	// Apply an implicit SectionAttr if '#pragma clang section bss\|data\|rodata' is active
	if (VD->hasGlobalStorage() && VD->isThisDeclarationADefinition() &&
	!inTemplateInstantiation() && !VD->hasAttr<SectionAttr>()) {
	if (PragmaClangBSSSection.Valid)
	VD->addAttr(PragmaClangBSSSectionAttr::CreateImplicit(
	Context, PragmaClangBSSSection.SectionName,
	PragmaClangBSSSection.PragmaLocation,
	AttributeCommonInfo::AS_Pragma));
	if (PragmaClangDataSection.Valid)
	VD->addAttr(PragmaClangDataSectionAttr::CreateImplicit(
	Context, PragmaClangDataSection.SectionName,
	PragmaClangDataSection.PragmaLocation,
	AttributeCommonInfo::AS_Pragma));
	if (PragmaClangRodataSection.Valid)
	VD->addAttr(PragmaClangRodataSectionAttr::CreateImplicit(
	Context, PragmaClangRodataSection.SectionName,
	PragmaClangRodataSection.PragmaLocation,
	AttributeCommonInfo::AS_Pragma));
	if (PragmaClangRelroSection.Valid)
	VD->addAttr(PragmaClangRelroSectionAttr::CreateImplicit(
	Context, PragmaClangRelroSection.SectionName,
	PragmaClangRelroSection.PragmaLocation,
	AttributeCommonInfo::AS_Pragma));
	}

	if (auto *DD = dyn_cast<DecompositionDecl>(ThisDecl)) {
	for (auto *BD : DD->bindings()) {
	FinalizeDeclaration(BD);
	}
	}

	checkAttributesAfterMerging(this, VD);

	if (VD->isStaticLocal())
	CheckStaticLocalForDllExport(VD);

	if (VD->getTLSKind())
	CheckThreadLocalForLargeAlignment(VD);

	// Perform check for initializers of device-side global variables.
	// CUDA allows empty constructors as initializers (see E.2.3.1, CUDA
	// 7.5). We must also apply the same checks to all __shared__
	// variables whether they are local or not. CUDA also allows
	// constant initializers for __constant__ and __device__ variables.
	if (getLangOpts().CUDA)
	checkAllowedCUDAInitializer(VD);

	// Grab the dllimport or dllexport attribute off of the VarDecl.
	const InheritableAttr *DLLAttr = getDLLAttr(VD);

	// Imported static data members cannot be defined out-of-line.
	if (const auto *IA = dyn_cast_or_null<DLLImportAttr>(DLLAttr)) {
	if (VD->isStaticDataMember() && VD->isOutOfLine() &&
	VD->isThisDeclarationADefinition()) {
	// We allow definitions of dllimport class template static data members
	// with a warning.
	CXXRecordDecl *Context =
	cast<CXXRecordDecl>(VD->getFirstDecl()->getDeclContext());
	bool IsClassTemplateMember =
	isa<ClassTemplatePartialSpecializationDecl>(Context) \|\|
	Context->getDescribedClassTemplate();

	Diag(VD->getLocation(),
	IsClassTemplateMember
	? diag::warn_attribute_dllimport_static_field_definition
	: diag::err_attribute_dllimport_static_field_definition);
	Diag(IA->getLocation(), diag::note_attribute);
	if (!IsClassTemplateMember)
	VD->setInvalidDecl();
	}
	}

	// dllimport/dllexport variables cannot be thread local, their TLS index
	// isn't exported with the variable.
	if (DLLAttr && VD->getTLSKind()) {
	auto *F = dyn_cast_or_null<FunctionDecl>(VD->getParentFunctionOrMethod());
	if (F && getDLLAttr(F)) {
	assert(VD->isStaticLocal());
	// But if this is a static local in a dlimport/dllexport function, the
	// function will never be inlined, which means the var would never be
	// imported, so having it marked import/export is safe.
	} else {
	Diag(VD->getLocation(), diag::err_attribute_dll_thread_local) << VD
	<< DLLAttr;
	VD->setInvalidDecl();
	}
	}

	if (UsedAttr *Attr = VD->getAttr<UsedAttr>()) {
	if (!Attr->isInherited() && !VD->isThisDeclarationADefinition()) {
	Diag(Attr->getLocation(), diag::warn_attribute_ignored_on_non_definition)
	<< Attr;
	VD->dropAttr<UsedAttr>();
	}
	}
	if (RetainAttr *Attr = VD->getAttr<RetainAttr>()) {
	if (!Attr->isInherited() && !VD->isThisDeclarationADefinition()) {
	Diag(Attr->getLocation(), diag::warn_attribute_ignored_on_non_definition)
	<< Attr;
	VD->dropAttr<RetainAttr>();
	}
	}

	const DeclContext *DC = VD->getDeclContext();
	// If there's a #pragma GCC visibility in scope, and this isn't a class
	// member, set the visibility of this variable.
	if (DC->getRedeclContext()->isFileContext() && VD->isExternallyVisible())
	AddPushedVisibilityAttribute(VD);

	// FIXME: Warn on unused var template partial specializations.
	if (VD->isFileVarDecl() && !isa<VarTemplatePartialSpecializationDecl>(VD))
	MarkUnusedFileScopedDecl(VD);

	// Now we have parsed the initializer and can update the table of magic
	// tag values.
	if (!VD->hasAttr<TypeTagForDatatypeAttr>() \|\|
	!VD->getType()->isIntegralOrEnumerationType())
	return;

	for (const auto *I : ThisDecl->specific_attrs<TypeTagForDatatypeAttr>()) {
	const Expr *MagicValueExpr = VD->getInit();
	if (!MagicValueExpr) {
	continue;
	}
	std::optional<llvm::APSInt> MagicValueInt;
	if (!(MagicValueInt = MagicValueExpr->getIntegerConstantExpr(Context))) {
	Diag(I->getRange().getBegin(),
	diag::err_type_tag_for_datatype_not_ice)
	<< LangOpts.CPlusPlus << MagicValueExpr->getSourceRange();
	continue;
	}
	if (MagicValueInt->getActiveBits() > 64) {
	Diag(I->getRange().getBegin(),
	diag::err_type_tag_for_datatype_too_large)
	<< LangOpts.CPlusPlus << MagicValueExpr->getSourceRange();
	continue;
	}
	uint64_t MagicValue = MagicValueInt->getZExtValue();
	RegisterTypeTagForDatatype(I->getArgumentKind(),
	MagicValue,
	I->getMatchingCType(),
	I->getLayoutCompatible(),
	I->getMustBeNull());
	}
	}

	static bool hasDeducedAuto(DeclaratorDecl *DD) {
	auto *VD = dyn_cast<VarDecl>(DD);
	return VD && !VD->getType()->hasAutoForTrailingReturnType();
	}

	Sema::DeclGroupPtrTy Sema::FinalizeDeclaratorGroup(Scope *S, const DeclSpec &DS,
	ArrayRef<Decl *> Group) {
	SmallVector<Decl*, 8> Decls;

	if (DS.isTypeSpecOwned())
	Decls.push_back(DS.getRepAsDecl());

	DeclaratorDecl *FirstDeclaratorInGroup = nullptr;
	DecompositionDecl *FirstDecompDeclaratorInGroup = nullptr;
	bool DiagnosedMultipleDecomps = false;
	DeclaratorDecl *FirstNonDeducedAutoInGroup = nullptr;
	bool DiagnosedNonDeducedAuto = false;

	for (unsigned i = 0, e = Group.size(); i != e; ++i) {
	if (Decl *D = Group[i]) {
	// For declarators, there are some additional syntactic-ish checks we need
	// to perform.
	if (auto *DD = dyn_cast<DeclaratorDecl>(D)) {
	if (!FirstDeclaratorInGroup)
	FirstDeclaratorInGroup = DD;
	if (!FirstDecompDeclaratorInGroup)
	FirstDecompDeclaratorInGroup = dyn_cast<DecompositionDecl>(D);
	if (!FirstNonDeducedAutoInGroup && DS.hasAutoTypeSpec() &&
	!hasDeducedAuto(DD))
	FirstNonDeducedAutoInGroup = DD;

	if (FirstDeclaratorInGroup != DD) {
	// A decomposition declaration cannot be combined with any other
	// declaration in the same group.
	if (FirstDecompDeclaratorInGroup && !DiagnosedMultipleDecomps) {
	Diag(FirstDecompDeclaratorInGroup->getLocation(),
	diag::err_decomp_decl_not_alone)
	<< FirstDeclaratorInGroup->getSourceRange()
	<< DD->getSourceRange();
	DiagnosedMultipleDecomps = true;
	}

	// A declarator that uses 'auto' in any way other than to declare a
	// variable with a deduced type cannot be combined with any other
	// declarator in the same group.
	if (FirstNonDeducedAutoInGroup && !DiagnosedNonDeducedAuto) {
	Diag(FirstNonDeducedAutoInGroup->getLocation(),
	diag::err_auto_non_deduced_not_alone)
	<< FirstNonDeducedAutoInGroup->getType()
	->hasAutoForTrailingReturnType()
	<< FirstDeclaratorInGroup->getSourceRange()
	<< DD->getSourceRange();
	DiagnosedNonDeducedAuto = true;
	}
	}
	}

	Decls.push_back(D);
	}
	}

	if (DeclSpec::isDeclRep(DS.getTypeSpecType())) {
	if (TagDecl *Tag = dyn_cast_or_null<TagDecl>(DS.getRepAsDecl())) {
	handleTagNumbering(Tag, S);
	if (FirstDeclaratorInGroup && !Tag->hasNameForLinkage() &&
	getLangOpts().CPlusPlus)
	Context.addDeclaratorForUnnamedTagDecl(Tag, FirstDeclaratorInGroup);
	}
	}

	return BuildDeclaratorGroup(Decls);
	}

	/// BuildDeclaratorGroup - convert a list of declarations into a declaration
	/// group, performing any necessary semantic checking.
	Sema::DeclGroupPtrTy
	Sema::BuildDeclaratorGroup(MutableArrayRef<Decl *> Group) {
	// C++14 [dcl.spec.auto]p7: (DR1347)
	// If the type that replaces the placeholder type is not the same in each
	// deduction, the program is ill-formed.
	if (Group.size() > 1) {
	QualType Deduced;
	VarDecl *DeducedDecl = nullptr;
	for (unsigned i = 0, e = Group.size(); i != e; ++i) {
	VarDecl *D = dyn_cast<VarDecl>(Group[i]);
	if (!D \|\| D->isInvalidDecl())
	break;
	DeducedType *DT = D->getType()->getContainedDeducedType();
	if (!DT \|\| DT->getDeducedType().isNull())
	continue;
	if (Deduced.isNull()) {
	Deduced = DT->getDeducedType();
	DeducedDecl = D;
	} else if (!Context.hasSameType(DT->getDeducedType(), Deduced)) {
	auto *AT = dyn_cast<AutoType>(DT);
	auto Dia = Diag(D->getTypeSourceInfo()->getTypeLoc().getBeginLoc(),
	diag::err_auto_different_deductions)
	<< (AT ? (unsigned)AT->getKeyword() : 3) << Deduced
	<< DeducedDecl->getDeclName() << DT->getDeducedType()
	<< D->getDeclName();
	if (DeducedDecl->hasInit())
	Dia << DeducedDecl->getInit()->getSourceRange();
	if (D->getInit())
	Dia << D->getInit()->getSourceRange();
	D->setInvalidDecl();
	break;
	}
	}
	}

	ActOnDocumentableDecls(Group);

	return DeclGroupPtrTy::make(
	DeclGroupRef::Create(Context, Group.data(), Group.size()));
	}

	void Sema::ActOnDocumentableDecl(Decl *D) {
	ActOnDocumentableDecls(D);
	}

	void Sema::ActOnDocumentableDecls(ArrayRef<Decl *> Group) {
	// Don't parse the comment if Doxygen diagnostics are ignored.
	if (Group.empty() \|\| !Group[0])
	return;

	if (Diags.isIgnored(diag::warn_doc_param_not_found,
	Group[0]->getLocation()) &&
	Diags.isIgnored(diag::warn_unknown_comment_command_name,
	Group[0]->getLocation()))
	return;

	if (Group.size() >= 2) {
	// This is a decl group. Normally it will contain only declarations
	// produced from declarator list. But in case we have any definitions or
	// additional declaration references:
	// 'typedef struct S {} S;'
	// 'typedef struct S *S;'
	// 'struct S *pS;'
	// FinalizeDeclaratorGroup adds these as separate declarations.
	Decl *MaybeTagDecl = Group[0];
	if (MaybeTagDecl && isa<TagDecl>(MaybeTagDecl)) {
	Group = Group.slice(1);
	}
	}

	// FIMXE: We assume every Decl in the group is in the same file.
	// This is false when preprocessor constructs the group from decls in
	// different files (e. g. macros or #include).
	Context.attachCommentsToJustParsedDecls(Group, &getPreprocessor());
	}

	/// Common checks for a parameter-declaration that should apply to both function
	/// parameters and non-type template parameters.
	void Sema::CheckFunctionOrTemplateParamDeclarator(Scope *S, Declarator &D) {
	// Check that there are no default arguments inside the type of this
	// parameter.
	if (getLangOpts().CPlusPlus)
	CheckExtraCXXDefaultArguments(D);

	// Parameter declarators cannot be qualified (C++ [dcl.meaning]p1).
	if (D.getCXXScopeSpec().isSet()) {
	Diag(D.getIdentifierLoc(), diag::err_qualified_param_declarator)
	<< D.getCXXScopeSpec().getRange();
	}

	// [dcl.meaning]p1: An unqualified-id occurring in a declarator-id shall be a
	// simple identifier except [...irrelevant cases...].
	switch (D.getName().getKind()) {
	case UnqualifiedIdKind::IK_Identifier:
	break;

	case UnqualifiedIdKind::IK_OperatorFunctionId:
	case UnqualifiedIdKind::IK_ConversionFunctionId:
	case UnqualifiedIdKind::IK_LiteralOperatorId:
	case UnqualifiedIdKind::IK_ConstructorName:
	case UnqualifiedIdKind::IK_DestructorName:
	case UnqualifiedIdKind::IK_ImplicitSelfParam:
	case UnqualifiedIdKind::IK_DeductionGuideName:
	Diag(D.getIdentifierLoc(), diag::err_bad_parameter_name)
	<< GetNameForDeclarator(D).getName();
	break;

	case UnqualifiedIdKind::IK_TemplateId:
	case UnqualifiedIdKind::IK_ConstructorTemplateId:
	// GetNameForDeclarator would not produce a useful name in this case.
	Diag(D.getIdentifierLoc(), diag::err_bad_parameter_name_template_id);
	break;
	}
	}

	/// ActOnParamDeclarator - Called from Parser::ParseFunctionDeclarator()
	/// to introduce parameters into function prototype scope.
	Decl Sema::ActOnParamDeclarator(Scope S, Declarator &D) {
	const DeclSpec &DS = D.getDeclSpec();

	// Verify C99 6.7.5.3p2: The only SCS allowed is 'register'.

	// C++03 [dcl.stc]p2 also permits 'auto'.
	StorageClass SC = SC_None;
	if (DS.getStorageClassSpec() == DeclSpec::SCS_register) {
	SC = SC_Register;
	// In C++11, the 'register' storage class specifier is deprecated.
	// In C++17, it is not allowed, but we tolerate it as an extension.
	if (getLangOpts().CPlusPlus11) {
	Diag(DS.getStorageClassSpecLoc(),
	getLangOpts().CPlusPlus17 ? diag::ext_register_storage_class
	: diag::warn_deprecated_register)
	<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
	}
	} else if (getLangOpts().CPlusPlus &&
	DS.getStorageClassSpec() == DeclSpec::SCS_auto) {
	SC = SC_Auto;
	} else if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified) {
	Diag(DS.getStorageClassSpecLoc(),
	diag::err_invalid_storage_class_in_func_decl);
	D.getMutableDeclSpec().ClearStorageClassSpecs();
	}

	if (DeclSpec::TSCS TSCS = DS.getThreadStorageClassSpec())
	Diag(DS.getThreadStorageClassSpecLoc(), diag::err_invalid_thread)
	<< DeclSpec::getSpecifierName(TSCS);
	if (DS.isInlineSpecified())
	Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
	<< getLangOpts().CPlusPlus17;
	if (DS.hasConstexprSpecifier())
	Diag(DS.getConstexprSpecLoc(), diag::err_invalid_constexpr)
	<< 0 << static_cast<int>(D.getDeclSpec().getConstexprSpecifier());

	DiagnoseFunctionSpecifiers(DS);

	CheckFunctionOrTemplateParamDeclarator(S, D);

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType parmDeclType = TInfo->getType();

	// Check for redeclaration of parameters, e.g. int foo(int x, int x);
	IdentifierInfo *II = D.getIdentifier();
	if (II) {
	LookupResult R(*this, II, D.getIdentifierLoc(), LookupOrdinaryName,
	ForVisibleRedeclaration);
	LookupName(R, S);
	if (R.isSingleResult()) {
	NamedDecl *PrevDecl = R.getFoundDecl();
	if (PrevDecl->isTemplateParameter()) {
	// Maybe we will complain about the shadowed template parameter.
	DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), PrevDecl);
	// Just pretend that we didn't see the previous declaration.
	PrevDecl = nullptr;
	} else if (S->isDeclScope(PrevDecl)) {
	Diag(D.getIdentifierLoc(), diag::err_param_redefinition) << II;
	Diag(PrevDecl->getLocation(), diag::note_previous_declaration);

	// Recover by removing the name
	II = nullptr;
	D.SetIdentifier(nullptr, D.getIdentifierLoc());
	D.setInvalidType(true);
	}
	}
	}

	// Temporarily put parameter variables in the translation unit, not
	// the enclosing context. This prevents them from accidentally
	// looking like class members in C++.
	ParmVarDecl *New =
	CheckParameter(Context.getTranslationUnitDecl(), D.getBeginLoc(),
	D.getIdentifierLoc(), II, parmDeclType, TInfo, SC);

	if (D.isInvalidType())
	New->setInvalidDecl();

	assert(S->isFunctionPrototypeScope());
	assert(S->getFunctionPrototypeDepth() >= 1);
	New->setScopeInfo(S->getFunctionPrototypeDepth() - 1,
	S->getNextFunctionPrototypeIndex());

	// Add the parameter declaration into this scope.
	S->AddDecl(New);
	if (II)
	IdResolver.AddDecl(New);

	ProcessDeclAttributes(S, New, D);

	if (D.getDeclSpec().isModulePrivateSpecified())
	Diag(New->getLocation(), diag::err_module_private_local)
	<< 1 << New << SourceRange(D.getDeclSpec().getModulePrivateSpecLoc())
	<< FixItHint::CreateRemoval(D.getDeclSpec().getModulePrivateSpecLoc());

	if (New->hasAttr<BlocksAttr>()) {
	Diag(New->getLocation(), diag::err_block_on_nonlocal);
	}

	if (getLangOpts().OpenCL)
	deduceOpenCLAddressSpace(New);

	return New;
	}

	/// Synthesizes a variable for a parameter arising from a
	/// typedef.
	ParmVarDecl Sema::BuildParmVarDeclForTypedef(DeclContext DC,
	SourceLocation Loc,
	QualType T) {
	/* FIXME: setting StartLoc == Loc.
	Would it be worth to modify callers so as to provide proper source
	location for the unnamed parameters, embedding the parameter's type? */
	ParmVarDecl *Param = ParmVarDecl::Create(Context, DC, Loc, Loc, nullptr,
	T, Context.getTrivialTypeSourceInfo(T, Loc),
	SC_None, nullptr);
	Param->setImplicit();
	return Param;
	}

	void Sema::DiagnoseUnusedParameters(ArrayRef<ParmVarDecl *> Parameters) {
	// Don't diagnose unused-parameter errors in template instantiations; we
	// will already have done so in the template itself.
	if (inTemplateInstantiation())
	return;

	for (const ParmVarDecl *Parameter : Parameters) {
	if (!Parameter->isReferenced() && Parameter->getDeclName() &&
	!Parameter->hasAttr<UnusedAttr>()) {
	Diag(Parameter->getLocation(), diag::warn_unused_parameter)
	<< Parameter->getDeclName();
	}
	}
	}

	void Sema::DiagnoseSizeOfParametersAndReturnValue(
	ArrayRef<ParmVarDecl > Parameters, QualType ReturnTy, NamedDecl D) {
	if (LangOpts.NumLargeByValueCopy == 0) // No check.
	return;

	// Warn if the return value is pass-by-value and larger than the specified
	// threshold.
	if (!ReturnTy->isDependentType() && ReturnTy.isPODType(Context)) {
	unsigned Size = Context.getTypeSizeInChars(ReturnTy).getQuantity();
	if (Size > LangOpts.NumLargeByValueCopy)
	Diag(D->getLocation(), diag::warn_return_value_size) << D << Size;
	}

	// Warn if any parameter is pass-by-value and larger than the specified
	// threshold.
	for (const ParmVarDecl *Parameter : Parameters) {
	QualType T = Parameter->getType();
	if (T->isDependentType() \|\| !T.isPODType(Context))
	continue;
	unsigned Size = Context.getTypeSizeInChars(T).getQuantity();
	if (Size > LangOpts.NumLargeByValueCopy)
	Diag(Parameter->getLocation(), diag::warn_parameter_size)
	<< Parameter << Size;
	}
	}

	ParmVarDecl Sema::CheckParameter(DeclContext DC, SourceLocation StartLoc,
	SourceLocation NameLoc, IdentifierInfo *Name,
	QualType T, TypeSourceInfo *TSInfo,
	StorageClass SC) {
	// In ARC, infer a lifetime qualifier for appropriate parameter types.
	if (getLangOpts().ObjCAutoRefCount &&
	T.getObjCLifetime() == Qualifiers::OCL_None &&
	T->isObjCLifetimeType()) {

	Qualifiers::ObjCLifetime lifetime;

	// Special cases for arrays:
	// - if it's const, use __unsafe_unretained
	// - otherwise, it's an error
	if (T->isArrayType()) {
	if (!T.isConstQualified()) {
	if (DelayedDiagnostics.shouldDelayDiagnostics())
	DelayedDiagnostics.add(
	sema::DelayedDiagnostic::makeForbiddenType(
	NameLoc, diag::err_arc_array_param_no_ownership, T, false));
	else
	Diag(NameLoc, diag::err_arc_array_param_no_ownership)
	<< TSInfo->getTypeLoc().getSourceRange();
	}
	lifetime = Qualifiers::OCL_ExplicitNone;
	} else {
	lifetime = T->getObjCARCImplicitLifetime();
	}
	T = Context.getLifetimeQualifiedType(T, lifetime);
	}

	ParmVarDecl *New = ParmVarDecl::Create(Context, DC, StartLoc, NameLoc, Name,
	Context.getAdjustedParameterType(T),
	TSInfo, SC, nullptr);

	// Make a note if we created a new pack in the scope of a lambda, so that
	// we know that references to that pack must also be expanded within the
	// lambda scope.
	if (New->isParameterPack())
	if (auto *LSI = getEnclosingLambda())
	LSI->LocalPacks.push_back(New);

	if (New->getType().hasNonTrivialToPrimitiveDestructCUnion() \|\|
	New->getType().hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnion(New->getType(), New->getLocation(),
	NTCUC_FunctionParam, NTCUK_Destruct\|NTCUK_Copy);

	// Parameters can not be abstract class types.
	// For record types, this is done by the AbstractClassUsageDiagnoser once
	// the class has been completely parsed.
	if (!CurContext->isRecord() &&
	RequireNonAbstractType(NameLoc, T, diag::err_abstract_type_in_decl,
	AbstractParamType))
	New->setInvalidDecl();

	// Parameter declarators cannot be interface types. All ObjC objects are
	// passed by reference.
	if (T->isObjCObjectType()) {
	SourceLocation TypeEndLoc =
	getLocForEndOfToken(TSInfo->getTypeLoc().getEndLoc());
	Diag(NameLoc,
	diag::err_object_cannot_be_passed_returned_by_value) << 1 << T
	<< FixItHint::CreateInsertion(TypeEndLoc, "*");
	T = Context.getObjCObjectPointerType(T);
	New->setType(T);
	}

	// ISO/IEC TR 18037 S6.7.3: "The type of an object with automatic storage
	// duration shall not be qualified by an address-space qualifier."
	// Since all parameters have automatic store duration, they can not have
	// an address space.
	if (T.getAddressSpace() != LangAS::Default &&
	// OpenCL allows function arguments declared to be an array of a type
	// to be qualified with an address space.
	!(getLangOpts().OpenCL &&
	(T->isArrayType() \|\| T.getAddressSpace() == LangAS::opencl_private))) {
	Diag(NameLoc, diag::err_arg_with_address_space);
	New->setInvalidDecl();
	}

	// PPC MMA non-pointer types are not allowed as function argument types.
	if (Context.getTargetInfo().getTriple().isPPC64() &&
	CheckPPCMMAType(New->getOriginalType(), New->getLocation())) {
	New->setInvalidDecl();
	}

	return New;
	}

	void Sema::ActOnFinishKNRParamDeclarations(Scope *S, Declarator &D,
	SourceLocation LocAfterDecls) {
	DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();

	// C99 6.9.1p6 "If a declarator includes an identifier list, each declaration
	// in the declaration list shall have at least one declarator, those
	// declarators shall only declare identifiers from the identifier list, and
	// every identifier in the identifier list shall be declared.
	//
	// C89 3.7.1p5 "If a declarator includes an identifier list, only the
	// identifiers it names shall be declared in the declaration list."
	//
	// This is why we only diagnose in C99 and later. Note, the other conditions
	// listed are checked elsewhere.
	if (!FTI.hasPrototype) {
	for (int i = FTI.NumParams; i != 0; /* decrement in loop */) {
	--i;
	if (FTI.Params[i].Param == nullptr) {
	if (getLangOpts().C99) {
	SmallString<256> Code;
	llvm::raw_svector_ostream(Code)
	<< " int " << FTI.Params[i].Ident->getName() << ";\n";
	Diag(FTI.Params[i].IdentLoc, diag::ext_param_not_declared)
	<< FTI.Params[i].Ident
	<< FixItHint::CreateInsertion(LocAfterDecls, Code);
	}

	// Implicitly declare the argument as type 'int' for lack of a better
	// type.
	AttributeFactory attrs;
	DeclSpec DS(attrs);
	const char* PrevSpec; // unused
	unsigned DiagID; // unused
	DS.SetTypeSpecType(DeclSpec::TST_int, FTI.Params[i].IdentLoc, PrevSpec,
	DiagID, Context.getPrintingPolicy());
	// Use the identifier location for the type source range.
	DS.SetRangeStart(FTI.Params[i].IdentLoc);
	DS.SetRangeEnd(FTI.Params[i].IdentLoc);
	Declarator ParamD(DS, ParsedAttributesView::none(),
	DeclaratorContext::KNRTypeList);
	ParamD.SetIdentifier(FTI.Params[i].Ident, FTI.Params[i].IdentLoc);
	FTI.Params[i].Param = ActOnParamDeclarator(S, ParamD);
	}
	}
	}
	}

	Decl *
	Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D,
	MultiTemplateParamsArg TemplateParameterLists,
	SkipBodyInfo *SkipBody, FnBodyKind BodyKind) {
	assert(getCurFunctionDecl() == nullptr && "Function parsing confused");
	assert(D.isFunctionDeclarator() && "Not a function declarator!");
	Scope *ParentScope = FnBodyScope->getParent();

	// Check if we are in an `omp begin/end declare variant` scope. If we are, and
	// we define a non-templated function definition, we will create a declaration
	// instead (=BaseFD), and emit the definition with a mangled name afterwards.
	// The base function declaration will have the equivalent of an `omp declare
	// variant` annotation which specifies the mangled definition as a
	// specialization function under the OpenMP context defined as part of the
	// `omp begin declare variant`.
	SmallVector<FunctionDecl *, 4> Bases;
	if (LangOpts.OpenMP && isInOpenMPDeclareVariantScope())
	ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(
	ParentScope, D, TemplateParameterLists, Bases);

	D.setFunctionDefinitionKind(FunctionDefinitionKind::Definition);
	Decl *DP = HandleDeclarator(ParentScope, D, TemplateParameterLists);
	Decl *Dcl = ActOnStartOfFunctionDef(FnBodyScope, DP, SkipBody, BodyKind);

	if (!Bases.empty())
	ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, Bases);

	return Dcl;
	}

	void Sema::ActOnFinishInlineFunctionDef(FunctionDecl *D) {
	Consumer.HandleInlineFunctionDefinition(D);
	}

	static bool FindPossiblePrototype(const FunctionDecl *FD,
	const FunctionDecl *&PossiblePrototype) {
	for (const FunctionDecl *Prev = FD->getPreviousDecl(); Prev;
	Prev = Prev->getPreviousDecl()) {
	// Ignore any declarations that occur in function or method
	// scope, because they aren't visible from the header.
	if (Prev->getLexicalDeclContext()->isFunctionOrMethod())
	continue;

	PossiblePrototype = Prev;
	return Prev->getType()->isFunctionProtoType();
	}
	return false;
	}

	static bool
	ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
	const FunctionDecl *&PossiblePrototype) {
	// Don't warn about invalid declarations.
	if (FD->isInvalidDecl())
	return false;

	// Or declarations that aren't global.
	if (!FD->isGlobal())
	return false;

	// Don't warn about C++ member functions.
	if (isa<CXXMethodDecl>(FD))
	return false;

	// Don't warn about 'main'.
	if (isa<TranslationUnitDecl>(FD->getDeclContext()->getRedeclContext()))
	if (IdentifierInfo *II = FD->getIdentifier())
	if (II->isStr("main") \|\| II->isStr("efi_main"))
	return false;

	// Don't warn about inline functions.
	if (FD->isInlined())
	return false;

	// Don't warn about function templates.
	if (FD->getDescribedFunctionTemplate())
	return false;

	// Don't warn about function template specializations.
	if (FD->isFunctionTemplateSpecialization())
	return false;

	// Don't warn for OpenCL kernels.
	if (FD->hasAttr<OpenCLKernelAttr>())
	return false;

	// Don't warn on explicitly deleted functions.
	if (FD->isDeleted())
	return false;

	// Don't warn on implicitly local functions (such as having local-typed
	// parameters).
	if (!FD->isExternallyVisible())
	return false;

	// If we were able to find a potential prototype, don't warn.
	if (FindPossiblePrototype(FD, PossiblePrototype))
	return false;

	return true;
	}

	void
	Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
	const FunctionDecl *EffectiveDefinition,
	SkipBodyInfo *SkipBody) {
	const FunctionDecl *Definition = EffectiveDefinition;
	if (!Definition &&
	!FD->isDefined(Definition, /CheckForPendingFriendDefinition/ true))
	return;

	if (Definition->getFriendObjectKind() != Decl::FOK_None) {
	if (FunctionDecl *OrigDef = Definition->getInstantiatedFromMemberFunction()) {
	if (FunctionDecl *OrigFD = FD->getInstantiatedFromMemberFunction()) {
	// A merged copy of the same function, instantiated as a member of
	// the same class, is OK.
	if (declaresSameEntity(OrigFD, OrigDef) &&
	declaresSameEntity(cast<Decl>(Definition->getLexicalDeclContext()),
	cast<Decl>(FD->getLexicalDeclContext())))
	return;
	}
	}
	}

	if (canRedefineFunction(Definition, getLangOpts()))
	return;

	// Don't emit an error when this is redefinition of a typo-corrected
	// definition.
	if (TypoCorrectedFunctionDefinitions.count(Definition))
	return;

	// If we don't have a visible definition of the function, and it's inline or
	// a template, skip the new definition.
	if (SkipBody && !hasVisibleDefinition(Definition) &&
	(Definition->getFormalLinkage() == InternalLinkage \|\|
	Definition->isInlined() \|\|
	Definition->getDescribedFunctionTemplate() \|\|
	Definition->getNumTemplateParameterLists())) {
	SkipBody->ShouldSkip = true;
	SkipBody->Previous = const_cast<FunctionDecl*>(Definition);
	if (auto *TD = Definition->getDescribedFunctionTemplate())
	makeMergedDefinitionVisible(TD);
	makeMergedDefinitionVisible(const_cast<FunctionDecl*>(Definition));
	return;
	}

	if (getLangOpts().GNUMode && Definition->isInlineSpecified() &&
	Definition->getStorageClass() == SC_Extern)
	Diag(FD->getLocation(), diag::err_redefinition_extern_inline)
	<< FD << getLangOpts().CPlusPlus;
	else
	Diag(FD->getLocation(), diag::err_redefinition) << FD;

	Diag(Definition->getLocation(), diag::note_previous_definition);
	FD->setInvalidDecl();
	}

	static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
	Sema &S) {
	CXXRecordDecl *const LambdaClass = CallOperator->getParent();

	LambdaScopeInfo *LSI = S.PushLambdaScope();
	LSI->CallOperator = CallOperator;
	LSI->Lambda = LambdaClass;
	LSI->ReturnType = CallOperator->getReturnType();
	const LambdaCaptureDefault LCD = LambdaClass->getLambdaCaptureDefault();

	if (LCD == LCD_None)
	LSI->ImpCaptureStyle = CapturingScopeInfo::ImpCap_None;
	else if (LCD == LCD_ByCopy)
	LSI->ImpCaptureStyle = CapturingScopeInfo::ImpCap_LambdaByval;
	else if (LCD == LCD_ByRef)
	LSI->ImpCaptureStyle = CapturingScopeInfo::ImpCap_LambdaByref;
	DeclarationNameInfo DNI = CallOperator->getNameInfo();

	LSI->IntroducerRange = DNI.getCXXOperatorNameRange();
	LSI->Mutable = !CallOperator->isConst();

	// Add the captures to the LSI so they can be noted as already
	// captured within tryCaptureVar.
	auto I = LambdaClass->field_begin();
	for (const auto &C : LambdaClass->captures()) {
	if (C.capturesVariable()) {
	ValueDecl *VD = C.getCapturedVar();
	if (VD->isInitCapture())
	S.CurrentInstantiationScope->InstantiatedLocal(VD, VD);
	const bool ByRef = C.getCaptureKind() == LCK_ByRef;
	LSI->addCapture(VD, /IsBlock/false, ByRef,
	/RefersToEnclosingVariableOrCapture/true, C.getLocation(),
	/EllipsisLoc/C.isPackExpansion()
	? C.getEllipsisLoc() : SourceLocation(),
	I->getType(), /Invalid/false);

	} else if (C.capturesThis()) {
	LSI->addThisCapture(/Nested/ false, C.getLocation(), I->getType(),
	C.getCaptureKind() == LCK_StarThis);
	} else {
	LSI->addVLATypeCapture(C.getLocation(), I->getCapturedVLAType(),
	I->getType());
	}
	++I;
	}
	}

	Decl Sema::ActOnStartOfFunctionDef(Scope FnBodyScope, Decl *D,
	SkipBodyInfo *SkipBody,
	FnBodyKind BodyKind) {
	if (!D) {
	// Parsing the function declaration failed in some way. Push on a fake scope
	// anyway so we can try to parse the function body.
	PushFunctionScope();
	PushExpressionEvaluationContext(ExprEvalContexts.back().Context);
	return D;
	}

	FunctionDecl *FD = nullptr;

	if (FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(D))
	FD = FunTmpl->getTemplatedDecl();
	else
	FD = cast<FunctionDecl>(D);

	// Do not push if it is a lambda because one is already pushed when building
	// the lambda in ActOnStartOfLambdaDefinition().
	if (!isLambdaCallOperator(FD))
	// [expr.const]/p14.1
	// An expression or conversion is in an immediate function context if it is
	// potentially evaluated and either: its innermost enclosing non-block scope
	// is a function parameter scope of an immediate function.
	PushExpressionEvaluationContext(
	FD->isConsteval() ? ExpressionEvaluationContext::ImmediateFunctionContext
	: ExprEvalContexts.back().Context);

	// Check for defining attributes before the check for redefinition.
	if (const auto *Attr = FD->getAttr<AliasAttr>()) {
	Diag(Attr->getLocation(), diag::err_alias_is_definition) << FD << 0;
	FD->dropAttr<AliasAttr>();
	FD->setInvalidDecl();
	}
	if (const auto *Attr = FD->getAttr<IFuncAttr>()) {
	Diag(Attr->getLocation(), diag::err_alias_is_definition) << FD << 1;
	FD->dropAttr<IFuncAttr>();
	FD->setInvalidDecl();
	}
	if (const auto *Attr = FD->getAttr<TargetVersionAttr>()) {
	if (!Context.getTargetInfo().hasFeature("fmv") &&
	!Attr->isDefaultVersion()) {
	// If function multi versioning disabled skip parsing function body
	// defined with non-default target_version attribute
	if (SkipBody)
	SkipBody->ShouldSkip = true;
	return nullptr;
	}
	}

	if (auto *Ctor = dyn_cast<CXXConstructorDecl>(FD)) {
	if (Ctor->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
	Ctor->isDefaultConstructor() &&
	Context.getTargetInfo().getCXXABI().isMicrosoft()) {
	// If this is an MS ABI dllexport default constructor, instantiate any
	// default arguments.
	InstantiateDefaultCtorDefaultArgs(Ctor);
	}
	}

	// See if this is a redefinition. If 'will have body' (or similar) is already
	// set, then these checks were already performed when it was set.
	if (!FD->willHaveBody() && !FD->isLateTemplateParsed() &&
	!FD->isThisDeclarationInstantiatedFromAFriendDefinition()) {
	CheckForFunctionRedefinition(FD, nullptr, SkipBody);

	// If we're skipping the body, we're done. Don't enter the scope.
	if (SkipBody && SkipBody->ShouldSkip)
	return D;
	}

	// Mark this function as "will have a body eventually". This lets users to
	// call e.g. isInlineDefinitionExternallyVisible while we're still parsing
	// this function.
	FD->setWillHaveBody();

	// If we are instantiating a generic lambda call operator, push
	// a LambdaScopeInfo onto the function stack. But use the information
	// that's already been calculated (ActOnLambdaExpr) to prime the current
	// LambdaScopeInfo.
	// When the template operator is being specialized, the LambdaScopeInfo,
	// has to be properly restored so that tryCaptureVariable doesn't try
	// and capture any new variables. In addition when calculating potential
	// captures during transformation of nested lambdas, it is necessary to
	// have the LSI properly restored.
	if (isGenericLambdaCallOperatorSpecialization(FD)) {
	assert(inTemplateInstantiation() &&
	"There should be an active template instantiation on the stack "
	"when instantiating a generic lambda!");
	RebuildLambdaScopeInfo(cast<CXXMethodDecl>(D), *this);
	} else {
	// Enter a new function scope
	PushFunctionScope();
	}

	// Builtin functions cannot be defined.
	if (unsigned BuiltinID = FD->getBuiltinID()) {
	if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) &&
	!Context.BuiltinInfo.isPredefinedRuntimeFunction(BuiltinID)) {
	Diag(FD->getLocation(), diag::err_builtin_definition) << FD;
	FD->setInvalidDecl();
	}
	}

	// The return type of a function definition must be complete (C99 6.9.1p3),
	// unless the function is deleted (C++ specifc, C++ [dcl.fct.def.general]p2)
	QualType ResultType = FD->getReturnType();
	if (!ResultType->isDependentType() && !ResultType->isVoidType() &&
	!FD->isInvalidDecl() && BodyKind != FnBodyKind::Delete &&
	RequireCompleteType(FD->getLocation(), ResultType,
	diag::err_func_def_incomplete_result))
	FD->setInvalidDecl();

	if (FnBodyScope)
	PushDeclContext(FnBodyScope, FD);

	// Check the validity of our function parameters
	if (BodyKind != FnBodyKind::Delete)
	CheckParmsForFunctionDef(FD->parameters(),
	/CheckParameterNames=/true);

	// Add non-parameter declarations already in the function to the current
	// scope.
	if (FnBodyScope) {
	for (Decl *NPD : FD->decls()) {
	auto *NonParmDecl = dyn_cast<NamedDecl>(NPD);
	if (!NonParmDecl)
	continue;
	assert(!isa<ParmVarDecl>(NonParmDecl) &&
	"parameters should not be in newly created FD yet");

	// If the decl has a name, make it accessible in the current scope.
	if (NonParmDecl->getDeclName())
	PushOnScopeChains(NonParmDecl, FnBodyScope, /AddToContext=/false);

	// Similarly, dive into enums and fish their constants out, making them
	// accessible in this scope.
	if (auto *ED = dyn_cast<EnumDecl>(NonParmDecl)) {
	for (auto *EI : ED->enumerators())
	PushOnScopeChains(EI, FnBodyScope, /AddToContext=/false);
	}
	}
	}

	// Introduce our parameters into the function scope
	for (auto *Param : FD->parameters()) {
	Param->setOwningFunction(FD);

	// If this has an identifier, add it to the scope stack.
	if (Param->getIdentifier() && FnBodyScope) {
	CheckShadow(FnBodyScope, Param);

	PushOnScopeChains(Param, FnBodyScope);
	}
	}

	// C++ [module.import/6] external definitions are not permitted in header
	// units. Deleted and Defaulted functions are implicitly inline (but the
	// inline state is not set at this point, so check the BodyKind explicitly).
	// FIXME: Consider an alternate location for the test where the inlined()
	// state is complete.
	if (getLangOpts().CPlusPlusModules && currentModuleIsHeaderUnit() &&
	!FD->isInvalidDecl() && !FD->isInlined() &&
	BodyKind != FnBodyKind::Delete && BodyKind != FnBodyKind::Default &&
	FD->getFormalLinkage() == Linkage::ExternalLinkage &&
	!FD->isTemplated() && !FD->isTemplateInstantiation()) {
	assert(FD->isThisDeclarationADefinition());
	Diag(FD->getLocation(), diag::err_extern_def_in_header_unit);
	FD->setInvalidDecl();
	}

	// Ensure that the function's exception specification is instantiated.
	if (const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>())
	ResolveExceptionSpec(D->getLocation(), FPT);

	// dllimport cannot be applied to non-inline function definitions.
	if (FD->hasAttr<DLLImportAttr>() && !FD->isInlined() &&
	!FD->isTemplateInstantiation()) {
	assert(!FD->hasAttr<DLLExportAttr>());
	Diag(FD->getLocation(), diag::err_attribute_dllimport_function_definition);
	FD->setInvalidDecl();
	return D;
	}
	// We want to attach documentation to original Decl (which might be
	// a function template).
	ActOnDocumentableDecl(D);
	if (getCurLexicalContext()->isObjCContainer() &&
	getCurLexicalContext()->getDeclKind() != Decl::ObjCCategoryImpl &&
	getCurLexicalContext()->getDeclKind() != Decl::ObjCImplementation)
	Diag(FD->getLocation(), diag::warn_function_def_in_objc_container);

	return D;
	}

	/// Given the set of return statements within a function body,
	/// compute the variables that are subject to the named return value
	/// optimization.
	///
	/// Each of the variables that is subject to the named return value
	/// optimization will be marked as NRVO variables in the AST, and any
	/// return statement that has a marked NRVO variable as its NRVO candidate can
	/// use the named return value optimization.
	///
	/// This function applies a very simplistic algorithm for NRVO: if every return
	/// statement in the scope of a variable has the same NRVO candidate, that
	/// candidate is an NRVO variable.
	void Sema::computeNRVO(Stmt Body, FunctionScopeInfo Scope) {
	ReturnStmt **Returns = Scope->Returns.data();

	for (unsigned I = 0, E = Scope->Returns.size(); I != E; ++I) {
	if (const VarDecl *NRVOCandidate = Returns[I]->getNRVOCandidate()) {
	if (!NRVOCandidate->isNRVOVariable())
	Returns[I]->setNRVOCandidate(nullptr);
	}
	}
	}

	bool Sema::canDelayFunctionBody(const Declarator &D) {
	// We can't delay parsing the body of a constexpr function template (yet).
	if (D.getDeclSpec().hasConstexprSpecifier())
	return false;

	// We can't delay parsing the body of a function template with a deduced
	// return type (yet).
	if (D.getDeclSpec().hasAutoTypeSpec()) {
	// If the placeholder introduces a non-deduced trailing return type,
	// we can still delay parsing it.
	if (D.getNumTypeObjects()) {
	const auto &Outer = D.getTypeObject(D.getNumTypeObjects() - 1);
	if (Outer.Kind == DeclaratorChunk::Function &&
	Outer.Fun.hasTrailingReturnType()) {
	QualType Ty = GetTypeFromParser(Outer.Fun.getTrailingReturnType());
	return Ty.isNull() \|\| !Ty->isUndeducedType();
	}
	}
	return false;
	}

	return true;
	}

	bool Sema::canSkipFunctionBody(Decl *D) {
	// We cannot skip the body of a function (or function template) which is
	// constexpr, since we may need to evaluate its body in order to parse the
	// rest of the file.
	// We cannot skip the body of a function with an undeduced return type,
	// because any callers of that function need to know the type.
	if (const FunctionDecl *FD = D->getAsFunction()) {
	if (FD->isConstexpr())
	return false;
	// We can't simply call Type::isUndeducedType here, because inside template
	// auto can be deduced to a dependent type, which is not considered
	// "undeduced".
	if (FD->getReturnType()->getContainedDeducedType())
	return false;
	}
	return Consumer.shouldSkipFunctionBody(D);
	}

	Decl Sema::ActOnSkippedFunctionBody(Decl Decl) {
	if (!Decl)
	return nullptr;
	if (FunctionDecl *FD = Decl->getAsFunction())
	FD->setHasSkippedBody();
	else if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(Decl))
	MD->setHasSkippedBody();
	return Decl;
	}

	Decl Sema::ActOnFinishFunctionBody(Decl D, Stmt *BodyArg) {
	return ActOnFinishFunctionBody(D, BodyArg, false);
	}

	/// RAII object that pops an ExpressionEvaluationContext when exiting a function
	/// body.
	class ExitFunctionBodyRAII {
	public:
	ExitFunctionBodyRAII(Sema &S, bool IsLambda) : S(S), IsLambda(IsLambda) {}
	~ExitFunctionBodyRAII() {
	if (!IsLambda)
	S.PopExpressionEvaluationContext();
	}

	private:
	Sema &S;
	bool IsLambda = false;
	};

	static void diagnoseImplicitlyRetainedSelf(Sema &S) {
	llvm::DenseMap<const BlockDecl *, bool> EscapeInfo;

	auto IsOrNestedInEscapingBlock = [&](const BlockDecl *BD) {
	if (EscapeInfo.count(BD))
	return EscapeInfo[BD];

	bool R = false;
	const BlockDecl *CurBD = BD;

	do {
	R = !CurBD->doesNotEscape();
	if (R)
	break;
	CurBD = CurBD->getParent()->getInnermostBlockDecl();
	} while (CurBD);

	return EscapeInfo[BD] = R;
	};

	// If the location where 'self' is implicitly retained is inside a escaping
	// block, emit a diagnostic.
	for (const std::pair<SourceLocation, const BlockDecl *> &P :
	S.ImplicitlyRetainedSelfLocs)
	if (IsOrNestedInEscapingBlock(P.second))
	S.Diag(P.first, diag::warn_implicitly_retains_self)
	<< FixItHint::CreateInsertion(P.first, "self->");
	}

	Decl Sema::ActOnFinishFunctionBody(Decl dcl, Stmt *Body,
	bool IsInstantiation) {
	FunctionScopeInfo *FSI = getCurFunction();
	FunctionDecl *FD = dcl ? dcl->getAsFunction() : nullptr;

	if (FSI->UsesFPIntrin && FD && !FD->hasAttr<StrictFPAttr>())
	FD->addAttr(StrictFPAttr::CreateImplicit(Context));

	sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
	sema::AnalysisBasedWarnings::Policy *ActivePolicy = nullptr;

	if (getLangOpts().Coroutines && FSI->isCoroutine())
	CheckCompletedCoroutineBody(FD, Body);

	{
	// Do not call PopExpressionEvaluationContext() if it is a lambda because
	// one is already popped when finishing the lambda in BuildLambdaExpr().
	// This is meant to pop the context added in ActOnStartOfFunctionDef().
	ExitFunctionBodyRAII ExitRAII(*this, isLambdaCallOperator(FD));

	if (FD) {
	FD->setBody(Body);
	FD->setWillHaveBody(false);

	if (getLangOpts().CPlusPlus14) {
	if (!FD->isInvalidDecl() && Body && !FD->isDependentContext() &&
	FD->getReturnType()->isUndeducedType()) {
	// For a function with a deduced result type to return void,
	// the result type as written must be 'auto' or 'decltype(auto)',
	// possibly cv-qualified or constrained, but not ref-qualified.
	if (!FD->getReturnType()->getAs<AutoType>()) {
	Diag(dcl->getLocation(), diag::err_auto_fn_no_return_but_not_auto)
	<< FD->getReturnType();
	FD->setInvalidDecl();
	} else {
	// Falling off the end of the function is the same as 'return;'.
	Expr *Dummy = nullptr;
	if (DeduceFunctionTypeFromReturnExpr(
	FD, dcl->getLocation(), Dummy,
	FD->getReturnType()->getAs<AutoType>()))
	FD->setInvalidDecl();
	}
	}
	} else if (getLangOpts().CPlusPlus11 && isLambdaCallOperator(FD)) {
	// In C++11, we don't use 'auto' deduction rules for lambda call
	// operators because we don't support return type deduction.
	auto *LSI = getCurLambda();
	if (LSI->HasImplicitReturnType) {
	deduceClosureReturnType(*LSI);

	// C++11 [expr.prim.lambda]p4:
	// [...] if there are no return statements in the compound-statement
	// [the deduced type is] the type void
	QualType RetType =
	LSI->ReturnType.isNull() ? Context.VoidTy : LSI->ReturnType;

	// Update the return type to the deduced type.
	const auto *Proto = FD->getType()->castAs<FunctionProtoType>();
	FD->setType(Context.getFunctionType(RetType, Proto->getParamTypes(),
	Proto->getExtProtoInfo()));
	}
	}

	// If the function implicitly returns zero (like 'main') or is naked,
	// don't complain about missing return statements.
	if (FD->hasImplicitReturnZero() \|\| FD->hasAttr<NakedAttr>())
	WP.disableCheckFallThrough();

	// MSVC permits the use of pure specifier (=0) on function definition,
	// defined at class scope, warn about this non-standard construct.
	if (getLangOpts().MicrosoftExt && FD->isPure() && !FD->isOutOfLine())
	Diag(FD->getLocation(), diag::ext_pure_function_definition);

	if (!FD->isInvalidDecl()) {
	// Don't diagnose unused parameters of defaulted, deleted or naked
	// functions.
	if (!FD->isDeleted() && !FD->isDefaulted() && !FD->hasSkippedBody() &&
	!FD->hasAttr<NakedAttr>())
	DiagnoseUnusedParameters(FD->parameters());
	DiagnoseSizeOfParametersAndReturnValue(FD->parameters(),
	FD->getReturnType(), FD);

	// If this is a structor, we need a vtable.
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(FD))
	MarkVTableUsed(FD->getLocation(), Constructor->getParent());
	else if (CXXDestructorDecl *Destructor =
	dyn_cast<CXXDestructorDecl>(FD))
	MarkVTableUsed(FD->getLocation(), Destructor->getParent());

	// Try to apply the named return value optimization. We have to check
	// if we can do this here because lambdas keep return statements around
	// to deduce an implicit return type.
	if (FD->getReturnType()->isRecordType() &&
	(!getLangOpts().CPlusPlus \|\| !FD->isDependentContext()))
	computeNRVO(Body, FSI);
	}

	// GNU warning -Wmissing-prototypes:
	// Warn if a global function is defined without a previous
	// prototype declaration. This warning is issued even if the
	// definition itself provides a prototype. The aim is to detect
	// global functions that fail to be declared in header files.
	const FunctionDecl *PossiblePrototype = nullptr;
	if (ShouldWarnAboutMissingPrototype(FD, PossiblePrototype)) {
	Diag(FD->getLocation(), diag::warn_missing_prototype) << FD;

	if (PossiblePrototype) {
	// We found a declaration that is not a prototype,
	// but that could be a zero-parameter prototype
	if (TypeSourceInfo *TI = PossiblePrototype->getTypeSourceInfo()) {
	TypeLoc TL = TI->getTypeLoc();
	if (FunctionNoProtoTypeLoc FTL = TL.getAs<FunctionNoProtoTypeLoc>())
	Diag(PossiblePrototype->getLocation(),
	diag::note_declaration_not_a_prototype)
	<< (FD->getNumParams() != 0)
	<< (FD->getNumParams() == 0 ? FixItHint::CreateInsertion(
	FTL.getRParenLoc(), "void")
	: FixItHint{});
	}
	} else {
	// Returns true if the token beginning at this Loc is `const`.
	auto isLocAtConst = [&](SourceLocation Loc, const SourceManager &SM,
	const LangOptions &LangOpts) {
	std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
	if (LocInfo.first.isInvalid())
	return false;

	bool Invalid = false;
	StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
	if (Invalid)
	return false;

	if (LocInfo.second > Buffer.size())
	return false;

	const char *LexStart = Buffer.data() + LocInfo.second;
	StringRef StartTok(LexStart, Buffer.size() - LocInfo.second);

	return StartTok.consume_front("const") &&
	(StartTok.empty() \|\| isWhitespace(StartTok[0]) \|\|
	StartTok.startswith("/*") \|\| StartTok.startswith("//"));
	};

	auto findBeginLoc = [&]() {
	// If the return type has `const` qualifier, we want to insert
	// `static` before `const` (and not before the typename).
	if ((FD->getReturnType()->isAnyPointerType() &&
	FD->getReturnType()->getPointeeType().isConstQualified()) \|\|
	FD->getReturnType().isConstQualified()) {
	// But only do this if we can determine where the `const` is.

	if (isLocAtConst(FD->getBeginLoc(), getSourceManager(),
	getLangOpts()))

	return FD->getBeginLoc();
	}
	return FD->getTypeSpecStartLoc();
	};
	Diag(FD->getTypeSpecStartLoc(),
	diag::note_static_for_internal_linkage)
	<< /* function */ 1
	<< (FD->getStorageClass() == SC_None
	? FixItHint::CreateInsertion(findBeginLoc(), "static ")
	: FixItHint{});
	}
	}

	// We might not have found a prototype because we didn't wish to warn on
	// the lack of a missing prototype. Try again without the checks for
	// whether we want to warn on the missing prototype.
	if (!PossiblePrototype)
	(void)FindPossiblePrototype(FD, PossiblePrototype);

	// If the function being defined does not have a prototype, then we may
	// need to diagnose it as changing behavior in C2x because we now know
	// whether the function accepts arguments or not. This only handles the
	// case where the definition has no prototype but does have parameters
	// and either there is no previous potential prototype, or the previous
	// potential prototype also has no actual prototype. This handles cases
	// like:
	// void f(); void f(a) int a; {}
	// void g(a) int a; {}
	// See MergeFunctionDecl() for other cases of the behavior change
	// diagnostic. See GetFullTypeForDeclarator() for handling of a function
	// type without a prototype.
	if (!FD->hasWrittenPrototype() && FD->getNumParams() != 0 &&
	(!PossiblePrototype \|\| (!PossiblePrototype->hasWrittenPrototype() &&
	!PossiblePrototype->isImplicit()))) {
	// The function definition has parameters, so this will change behavior
	// in C2x. If there is a possible prototype, it comes before the
	// function definition.
	// FIXME: The declaration may have already been diagnosed as being
	// deprecated in GetFullTypeForDeclarator() if it had no arguments, but
	// there's no way to test for the "changes behavior" condition in
	// SemaType.cpp when forming the declaration's function type. So, we do
	// this awkward dance instead.
	//
	// If we have a possible prototype and it declares a function with a
	// prototype, we don't want to diagnose it; if we have a possible
	// prototype and it has no prototype, it may have already been
	// diagnosed in SemaType.cpp as deprecated depending on whether
	// -Wstrict-prototypes is enabled. If we already warned about it being
	// deprecated, add a note that it also changes behavior. If we didn't
	// warn about it being deprecated (because the diagnostic is not
	// enabled), warn now that it is deprecated and changes behavior.

	// This K&R C function definition definitely changes behavior in C2x,
	// so diagnose it.
	Diag(FD->getLocation(), diag::warn_non_prototype_changes_behavior)
	<< /definition/ 1 << /* not supported in C2x */ 0;

	// If we have a possible prototype for the function which is a user-
	// visible declaration, we already tested that it has no prototype.
	// This will change behavior in C2x. This gets a warning rather than a
	// note because it's the same behavior-changing problem as with the
	// definition.
	if (PossiblePrototype)
	Diag(PossiblePrototype->getLocation(),
	diag::warn_non_prototype_changes_behavior)
	<< /declaration/ 0 << /* conflicting / 1 << /subsequent*/ 1
	<< /definition/ 1;
	}

	// Warn on CPUDispatch with an actual body.
	if (FD->isMultiVersion() && FD->hasAttr<CPUDispatchAttr>() && Body)
	if (const auto *CmpndBody = dyn_cast<CompoundStmt>(Body))
	if (!CmpndBody->body_empty())
	Diag(CmpndBody->body_front()->getBeginLoc(),
	diag::warn_dispatch_body_ignored);

	if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
	const CXXMethodDecl *KeyFunction;
	if (MD->isOutOfLine() && (MD = MD->getCanonicalDecl()) &&
	MD->isVirtual() &&
	(KeyFunction = Context.getCurrentKeyFunction(MD->getParent())) &&
	MD == KeyFunction->getCanonicalDecl()) {
	// Update the key-function state if necessary for this ABI.
	if (FD->isInlined() &&
	!Context.getTargetInfo().getCXXABI().canKeyFunctionBeInline()) {
	Context.setNonKeyFunction(MD);

	// If the newly-chosen key function is already defined, then we
	// need to mark the vtable as used retroactively.
	KeyFunction = Context.getCurrentKeyFunction(MD->getParent());
	const FunctionDecl *Definition;
	if (KeyFunction && KeyFunction->isDefined(Definition))
	MarkVTableUsed(Definition->getLocation(), MD->getParent(), true);
	} else {
	// We just defined they key function; mark the vtable as used.
	MarkVTableUsed(FD->getLocation(), MD->getParent(), true);
	}
	}
	}

	assert(
	(FD == getCurFunctionDecl() \|\| getCurLambda()->CallOperator == FD) &&
	"Function parsing confused");
	} else if (ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(dcl)) {
	assert(MD == getCurMethodDecl() && "Method parsing confused");
	MD->setBody(Body);
	if (!MD->isInvalidDecl()) {
	DiagnoseSizeOfParametersAndReturnValue(MD->parameters(),
	MD->getReturnType(), MD);

	if (Body)
	computeNRVO(Body, FSI);
	}
	if (FSI->ObjCShouldCallSuper) {
	Diag(MD->getEndLoc(), diag::warn_objc_missing_super_call)
	<< MD->getSelector().getAsString();
	FSI->ObjCShouldCallSuper = false;
	}
	if (FSI->ObjCWarnForNoDesignatedInitChain) {
	const ObjCMethodDecl *InitMethod = nullptr;
	bool isDesignated =
	MD->isDesignatedInitializerForTheInterface(&InitMethod);
	assert(isDesignated && InitMethod);
	(void)isDesignated;

	auto superIsNSObject = [&](const ObjCMethodDecl *MD) {
	auto IFace = MD->getClassInterface();
	if (!IFace)
	return false;
	auto SuperD = IFace->getSuperClass();
	if (!SuperD)
	return false;
	return SuperD->getIdentifier() ==
	NSAPIObj->getNSClassId(NSAPI::ClassId_NSObject);
	};
	// Don't issue this warning for unavailable inits or direct subclasses
	// of NSObject.
	if (!MD->isUnavailable() && !superIsNSObject(MD)) {
	Diag(MD->getLocation(),
	diag::warn_objc_designated_init_missing_super_call);
	Diag(InitMethod->getLocation(),
	diag::note_objc_designated_init_marked_here);
	}
	FSI->ObjCWarnForNoDesignatedInitChain = false;
	}
	if (FSI->ObjCWarnForNoInitDelegation) {
	// Don't issue this warning for unavaialable inits.
	if (!MD->isUnavailable())
	Diag(MD->getLocation(),
	diag::warn_objc_secondary_init_missing_init_call);
	FSI->ObjCWarnForNoInitDelegation = false;
	}

	diagnoseImplicitlyRetainedSelf(*this);
	} else {
	// Parsing the function declaration failed in some way. Pop the fake scope
	// we pushed on.
	PopFunctionScopeInfo(ActivePolicy, dcl);
	return nullptr;
	}

	if (Body && FSI->HasPotentialAvailabilityViolations)
	DiagnoseUnguardedAvailabilityViolations(dcl);

	assert(!FSI->ObjCShouldCallSuper &&
	"This should only be set for ObjC methods, which should have been "
	"handled in the block above.");

	// Verify and clean out per-function state.
	if (Body && (!FD \|\| !FD->isDefaulted())) {
	// C++ constructors that have function-try-blocks can't have return
	// statements in the handlers of that block. (C++ [except.handle]p14)
	// Verify this.
	if (FD && isa<CXXConstructorDecl>(FD) && isa<CXXTryStmt>(Body))
	DiagnoseReturnInConstructorExceptionHandler(cast<CXXTryStmt>(Body));

	// Verify that gotos and switch cases don't jump into scopes illegally.
	if (FSI->NeedsScopeChecking() && !PP.isCodeCompletionEnabled())
	DiagnoseInvalidJumps(Body);

	if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(dcl)) {
	if (!Destructor->getParent()->isDependentType())
	CheckDestructor(Destructor);

	MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
	Destructor->getParent());
	}

	// If any errors have occurred, clear out any temporaries that may have
	// been leftover. This ensures that these temporaries won't be picked up
	// for deletion in some later function.
	if (hasUncompilableErrorOccurred() \|\|
	getDiagnostics().getSuppressAllDiagnostics()) {
	DiscardCleanupsInEvaluationContext();
	}
	if (!hasUncompilableErrorOccurred() && !isa<FunctionTemplateDecl>(dcl)) {
	// Since the body is valid, issue any analysis-based warnings that are
	// enabled.
	ActivePolicy = &WP;
	}

	if (!IsInstantiation && FD && FD->isConstexpr() && !FD->isInvalidDecl() &&
	!CheckConstexprFunctionDefinition(FD, CheckConstexprKind::Diagnose))
	FD->setInvalidDecl();

	if (FD && FD->hasAttr<NakedAttr>()) {
	for (const Stmt *S : Body->children()) {
	// Allow local register variables without initializer as they don't
	// require prologue.
	bool RegisterVariables = false;
	if (auto *DS = dyn_cast<DeclStmt>(S)) {
	for (const auto *Decl : DS->decls()) {
	if (const auto *Var = dyn_cast<VarDecl>(Decl)) {
	RegisterVariables =
	Var->hasAttr<AsmLabelAttr>() && !Var->hasInit();
	if (!RegisterVariables)
	break;
	}
	}
	}
	if (RegisterVariables)
	continue;
	if (!isa<AsmStmt>(S) && !isa<NullStmt>(S)) {
	Diag(S->getBeginLoc(), diag::err_non_asm_stmt_in_naked_function);
	Diag(FD->getAttr<NakedAttr>()->getLocation(), diag::note_attribute);
	FD->setInvalidDecl();
	break;
	}
	}
	}

	assert(ExprCleanupObjects.size() ==
	ExprEvalContexts.back().NumCleanupObjects &&
	"Leftover temporaries in function");
	assert(!Cleanup.exprNeedsCleanups() &&
	"Unaccounted cleanups in function");
	assert(MaybeODRUseExprs.empty() &&
	"Leftover expressions for odr-use checking");
	}
	} // Pops the ExitFunctionBodyRAII scope, which needs to happen before we pop
	// the declaration context below. Otherwise, we're unable to transform
	// 'this' expressions when transforming immediate context functions.

	if (!IsInstantiation)
	PopDeclContext();

	PopFunctionScopeInfo(ActivePolicy, dcl);
	// If any errors have occurred, clear out any temporaries that may have
	// been leftover. This ensures that these temporaries won't be picked up for
	// deletion in some later function.
	if (hasUncompilableErrorOccurred()) {
	DiscardCleanupsInEvaluationContext();
	}

	if (FD && ((LangOpts.OpenMP && (LangOpts.OpenMPIsDevice \|\|
	!LangOpts.OMPTargetTriples.empty())) \|\|
	LangOpts.CUDA \|\| LangOpts.SYCLIsDevice)) {
	auto ES = getEmissionStatus(FD);
	if (ES == Sema::FunctionEmissionStatus::Emitted \|\|
	ES == Sema::FunctionEmissionStatus::Unknown)
	DeclsToCheckForDeferredDiags.insert(FD);
	}

	if (FD && !FD->isDeleted())
	checkTypeSupport(FD->getType(), FD->getLocation(), FD);

	return dcl;
	}

	/// When we finish delayed parsing of an attribute, we must attach it to the
	/// relevant Decl.
	void Sema::ActOnFinishDelayedAttribute(Scope S, Decl D,
	ParsedAttributes &Attrs) {
	// Always attach attributes to the underlying decl.
	if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
	D = TD->getTemplatedDecl();
	ProcessDeclAttributeList(S, D, Attrs);

	if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(D))
	if (Method->isStatic())
	checkThisInStaticMemberFunctionAttributes(Method);
	}

	/// ImplicitlyDefineFunction - An undeclared identifier was used in a function
	/// call, forming a call to an implicitly defined function (per C99 6.5.1p2).
	NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc,
	IdentifierInfo &II, Scope *S) {
	// It is not valid to implicitly define a function in C2x.
	assert(LangOpts.implicitFunctionsAllowed() &&
	"Implicit function declarations aren't allowed in this language mode");

	// Find the scope in which the identifier is injected and the corresponding
	// DeclContext.
	// FIXME: C89 does not say what happens if there is no enclosing block scope.
	// In that case, we inject the declaration into the translation unit scope
	// instead.
	Scope *BlockScope = S;
	while (!BlockScope->isCompoundStmtScope() && BlockScope->getParent())
	BlockScope = BlockScope->getParent();

	Scope *ContextScope = BlockScope;
	while (!ContextScope->getEntity())
	ContextScope = ContextScope->getParent();
	ContextRAII SavedContext(*this, ContextScope->getEntity());

	// Before we produce a declaration for an implicitly defined
	// function, see whether there was a locally-scoped declaration of
	// this name as a function or variable. If so, use that
	// (non-visible) declaration, and complain about it.
	NamedDecl *ExternCPrev = findLocallyScopedExternCDecl(&II);
	if (ExternCPrev) {
	// We still need to inject the function into the enclosing block scope so
	// that later (non-call) uses can see it.
	PushOnScopeChains(ExternCPrev, BlockScope, /AddToContext/false);

	// C89 footnote 38:
	// If in fact it is not defined as having type "function returning int",
	// the behavior is undefined.
	if (!isa<FunctionDecl>(ExternCPrev) \|\|
	!Context.typesAreCompatible(
	cast<FunctionDecl>(ExternCPrev)->getType(),
	Context.getFunctionNoProtoType(Context.IntTy))) {
	Diag(Loc, diag::ext_use_out_of_scope_declaration)
	<< ExternCPrev << !getLangOpts().C99;
	Diag(ExternCPrev->getLocation(), diag::note_previous_declaration);
	return ExternCPrev;
	}
	}

	// Extension in C99 (defaults to error). Legal in C89, but warn about it.
	unsigned diag_id;
	if (II.getName().startswith("__builtin_"))
	diag_id = diag::warn_builtin_unknown;
	// OpenCL v2.0 s6.9.u - Implicit function declaration is not supported.
	else if (getLangOpts().C99)
	diag_id = diag::ext_implicit_function_decl_c99;
	else
	diag_id = diag::warn_implicit_function_decl;

	TypoCorrection Corrected;
	// Because typo correction is expensive, only do it if the implicit
	// function declaration is going to be treated as an error.
	//
	// Perform the correction before issuing the main diagnostic, as some
	// consumers use typo-correction callbacks to enhance the main diagnostic.
	if (S && !ExternCPrev &&
	(Diags.getDiagnosticLevel(diag_id, Loc) >= DiagnosticsEngine::Error)) {
	DeclFilterCCC<FunctionDecl> CCC{};
	Corrected = CorrectTypo(DeclarationNameInfo(&II, Loc), LookupOrdinaryName,
	S, nullptr, CCC, CTK_NonError);
	}

	Diag(Loc, diag_id) << &II;
	if (Corrected) {
	// If the correction is going to suggest an implicitly defined function,
	// skip the correction as not being a particularly good idea.
	bool Diagnose = true;
	if (const auto *D = Corrected.getCorrectionDecl())
	Diagnose = !D->isImplicit();
	if (Diagnose)
	diagnoseTypo(Corrected, PDiag(diag::note_function_suggestion),
	/ErrorRecovery/ false);
	}

	// If we found a prior declaration of this function, don't bother building
	// another one. We've already pushed that one into scope, so there's nothing
	// more to do.
	if (ExternCPrev)
	return ExternCPrev;

	// Set a Declarator for the implicit definition: int foo();
	const char *Dummy;
	AttributeFactory attrFactory;
	DeclSpec DS(attrFactory);
	unsigned DiagID;
	bool Error = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, Dummy, DiagID,
	Context.getPrintingPolicy());
	(void)Error; // Silence warning.
	assert(!Error && "Error setting up implicit decl!");
	SourceLocation NoLoc;
	Declarator D(DS, ParsedAttributesView::none(), DeclaratorContext::Block);
	D.AddTypeInfo(DeclaratorChunk::getFunction(/HasProto=/false,
	/IsAmbiguous=/false,
	/LParenLoc=/NoLoc,
	/Params=/nullptr,
	/NumParams=/0,
	/EllipsisLoc=/NoLoc,
	/RParenLoc=/NoLoc,
	/RefQualifierIsLvalueRef=/true,
	/RefQualifierLoc=/NoLoc,
	/MutableLoc=/NoLoc, EST_None,
	/ESpecRange=/SourceRange(),
	/Exceptions=/nullptr,
	/ExceptionRanges=/nullptr,
	/NumExceptions=/0,
	/NoexceptExpr=/nullptr,
	/ExceptionSpecTokens=/nullptr,
	/DeclsInPrototype=/std::nullopt,
	Loc, Loc, D),
	std::move(DS.getAttributes()), SourceLocation());
	D.SetIdentifier(&II, Loc);

	// Insert this function into the enclosing block scope.
	FunctionDecl *FD = cast<FunctionDecl>(ActOnDeclarator(BlockScope, D));
	FD->setImplicit();

	AddKnownFunctionAttributes(FD);

	return FD;
	}

	/// If this function is a C++ replaceable global allocation function
	/// (C++2a [basic.stc.dynamic.allocation], C++2a [new.delete]),
	/// adds any function attributes that we know a priori based on the standard.
	///
	/// We need to check for duplicate attributes both here and where user-written
	/// attributes are applied to declarations.
	void Sema::AddKnownFunctionAttributesForReplaceableGlobalAllocationFunction(
	FunctionDecl *FD) {
	if (FD->isInvalidDecl())
	return;

	if (FD->getDeclName().getCXXOverloadedOperator() != OO_New &&
	FD->getDeclName().getCXXOverloadedOperator() != OO_Array_New)
	return;

	std::optional<unsigned> AlignmentParam;
	bool IsNothrow = false;
	if (!FD->isReplaceableGlobalAllocationFunction(&AlignmentParam, &IsNothrow))
	return;

	// C++2a [basic.stc.dynamic.allocation]p4:
	// An allocation function that has a non-throwing exception specification
	// indicates failure by returning a null pointer value. Any other allocation
	// function never returns a null pointer value and indicates failure only by
	// throwing an exception [...]
	if (!IsNothrow && !FD->hasAttr<ReturnsNonNullAttr>())
	FD->addAttr(ReturnsNonNullAttr::CreateImplicit(Context, FD->getLocation()));

	// C++2a [basic.stc.dynamic.allocation]p2:
	// An allocation function attempts to allocate the requested amount of
	// storage. [...] If the request succeeds, the value returned by a
	// replaceable allocation function is a [...] pointer value p0 different
	// from any previously returned value p1 [...]
	//
	// However, this particular information is being added in codegen,
	// because there is an opt-out switch for it (-fno-assume-sane-operator-new)

	// C++2a [basic.stc.dynamic.allocation]p2:
	// An allocation function attempts to allocate the requested amount of
	// storage. If it is successful, it returns the address of the start of a
	// block of storage whose length in bytes is at least as large as the
	// requested size.
	if (!FD->hasAttr<AllocSizeAttr>()) {
	FD->addAttr(AllocSizeAttr::CreateImplicit(
	Context, /ElemSizeParam=/ParamIdx(1, FD),
	/NumElemsParam=/ParamIdx(), FD->getLocation()));
	}

	// C++2a [basic.stc.dynamic.allocation]p3:
	// For an allocation function [...], the pointer returned on a successful
	// call shall represent the address of storage that is aligned as follows:
	// (3.1) If the allocation function takes an argument of type
	// std::align_val_t, the storage will have the alignment
	// specified by the value of this argument.
	if (AlignmentParam && !FD->hasAttr<AllocAlignAttr>()) {
	FD->addAttr(AllocAlignAttr::CreateImplicit(
	Context, ParamIdx(*AlignmentParam, FD), FD->getLocation()));
	}

	// FIXME:
	// C++2a [basic.stc.dynamic.allocation]p3:
	// For an allocation function [...], the pointer returned on a successful
	// call shall represent the address of storage that is aligned as follows:
	// (3.2) Otherwise, if the allocation function is named operator new[],
	// the storage is aligned for any object that does not have
	// new-extended alignment ([basic.align]) and is no larger than the
	// requested size.
	// (3.3) Otherwise, the storage is aligned for any object that does not
	// have new-extended alignment and is of the requested size.
	}

	/// Adds any function attributes that we know a priori based on
	/// the declaration of this function.
	///
	/// These attributes can apply both to implicitly-declared builtins
	/// (like __builtin___printf_chk) or to library-declared functions
	/// like NSLog or printf.
	///
	/// We need to check for duplicate attributes both here and where user-written
	/// attributes are applied to declarations.
	void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
	if (FD->isInvalidDecl())
	return;

	// If this is a built-in function, map its builtin attributes to
	// actual attributes.
	if (unsigned BuiltinID = FD->getBuiltinID()) {
	// Handle printf-formatting attributes.
	unsigned FormatIdx;
	bool HasVAListArg;
	if (Context.BuiltinInfo.isPrintfLike(BuiltinID, FormatIdx, HasVAListArg)) {
	if (!FD->hasAttr<FormatAttr>()) {
	const char *fmt = "printf";
	unsigned int NumParams = FD->getNumParams();
	if (FormatIdx < NumParams && // NumParams may be 0 (e.g. vfprintf)
	FD->getParamDecl(FormatIdx)->getType()->isObjCObjectPointerType())
	fmt = "NSString";
	FD->addAttr(FormatAttr::CreateImplicit(Context,
	&Context.Idents.get(fmt),
	FormatIdx+1,
	HasVAListArg ? 0 : FormatIdx+2,
	FD->getLocation()));
	}
	}
	if (Context.BuiltinInfo.isScanfLike(BuiltinID, FormatIdx,
	HasVAListArg)) {
	if (!FD->hasAttr<FormatAttr>())
	FD->addAttr(FormatAttr::CreateImplicit(Context,
	&Context.Idents.get("scanf"),
	FormatIdx+1,
	HasVAListArg ? 0 : FormatIdx+2,
	FD->getLocation()));
	}

	// Handle automatically recognized callbacks.
	SmallVector<int, 4> Encoding;
	if (!FD->hasAttr<CallbackAttr>() &&
	Context.BuiltinInfo.performsCallback(BuiltinID, Encoding))
	FD->addAttr(CallbackAttr::CreateImplicit(
	Context, Encoding.data(), Encoding.size(), FD->getLocation()));

	// Mark const if we don't care about errno and/or floating point exceptions
	// that are the only thing preventing the function from being const. This
	// allows IRgen to use LLVM intrinsics for such functions.
	bool NoExceptions =
	getLangOpts().getDefaultExceptionMode() == LangOptions::FPE_Ignore;
	bool ConstWithoutErrnoAndExceptions =
	Context.BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
	bool ConstWithoutExceptions =
	Context.BuiltinInfo.isConstWithoutExceptions(BuiltinID);
	if (!FD->hasAttr<ConstAttr>() &&
	(ConstWithoutErrnoAndExceptions \|\| ConstWithoutExceptions) &&
	(!ConstWithoutErrnoAndExceptions \|\|
	(!getLangOpts().MathErrno && NoExceptions)) &&
	(!ConstWithoutExceptions \|\| NoExceptions))
	FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));

	// We make "fma" on GNU or Windows const because we know it does not set
	// errno in those environments even though it could set errno based on the
	// C standard.
	const llvm::Triple &Trip = Context.getTargetInfo().getTriple();
	if ((Trip.isGNUEnvironment() \|\| Trip.isOSMSVCRT()) &&
	!FD->hasAttr<ConstAttr>()) {
	switch (BuiltinID) {
	case Builtin::BI__builtin_fma:
	case Builtin::BI__builtin_fmaf:
	case Builtin::BI__builtin_fmal:
	case Builtin::BIfma:
	case Builtin::BIfmaf:
	case Builtin::BIfmal:
	FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
	break;
	default:
	break;
	}
	}

	if (Context.BuiltinInfo.isReturnsTwice(BuiltinID) &&
	!FD->hasAttr<ReturnsTwiceAttr>())
	FD->addAttr(ReturnsTwiceAttr::CreateImplicit(Context,
	FD->getLocation()));
	if (Context.BuiltinInfo.isNoThrow(BuiltinID) && !FD->hasAttr<NoThrowAttr>())
	FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
	if (Context.BuiltinInfo.isPure(BuiltinID) && !FD->hasAttr<PureAttr>())
	FD->addAttr(PureAttr::CreateImplicit(Context, FD->getLocation()));
	if (Context.BuiltinInfo.isConst(BuiltinID) && !FD->hasAttr<ConstAttr>())
	FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
	if (getLangOpts().CUDA && Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
	!FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAHostAttr>()) {
	// Add the appropriate attribute, depending on the CUDA compilation mode
	// and which target the builtin belongs to. For example, during host
	// compilation, aux builtins are __device__, while the rest are __host__.
	if (getLangOpts().CUDAIsDevice !=
	Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
	FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation()));
	else
	FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation()));
	}

	// Add known guaranteed alignment for allocation functions.
	switch (BuiltinID) {
	case Builtin::BImemalign:
	case Builtin::BIaligned_alloc:
	if (!FD->hasAttr<AllocAlignAttr>())
	FD->addAttr(AllocAlignAttr::CreateImplicit(Context, ParamIdx(1, FD),
	FD->getLocation()));
	break;
	default:
	break;
	}

	// Add allocsize attribute for allocation functions.
	switch (BuiltinID) {
	case Builtin::BIcalloc:
	FD->addAttr(AllocSizeAttr::CreateImplicit(
	Context, ParamIdx(1, FD), ParamIdx(2, FD), FD->getLocation()));
	break;
	case Builtin::BImemalign:
	case Builtin::BIaligned_alloc:
	case Builtin::BIrealloc:
	FD->addAttr(AllocSizeAttr::CreateImplicit(Context, ParamIdx(2, FD),
	ParamIdx(), FD->getLocation()));
	break;
	case Builtin::BImalloc:
	FD->addAttr(AllocSizeAttr::CreateImplicit(Context, ParamIdx(1, FD),
	ParamIdx(), FD->getLocation()));
	break;
	default:
	break;
	}

	// Add lifetime attribute to std::move, std::fowrard et al.
	switch (BuiltinID) {
	case Builtin::BIaddressof:
	case Builtin::BI__addressof:
	case Builtin::BI__builtin_addressof:
	case Builtin::BIas_const:
	case Builtin::BIforward:
	case Builtin::BImove:
	case Builtin::BImove_if_noexcept:
	if (ParmVarDecl *P = FD->getParamDecl(0u);
	!P->hasAttr<LifetimeBoundAttr>())
	P->addAttr(
	LifetimeBoundAttr::CreateImplicit(Context, FD->getLocation()));
	break;
	default:
	break;
	}
	}

	AddKnownFunctionAttributesForReplaceableGlobalAllocationFunction(FD);

	// If C++ exceptions are enabled but we are told extern "C" functions cannot
	// throw, add an implicit nothrow attribute to any extern "C" function we come
	// across.
	if (getLangOpts().CXXExceptions && getLangOpts().ExternCNoUnwind &&
	FD->isExternC() && !FD->hasAttr<NoThrowAttr>()) {
	const auto *FPT = FD->getType()->getAs<FunctionProtoType>();
	if (!FPT \|\| FPT->getExceptionSpecType() == EST_None)
	FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
	}

	IdentifierInfo *Name = FD->getIdentifier();
	if (!Name)
	return;
	if ((!getLangOpts().CPlusPlus &&
	FD->getDeclContext()->isTranslationUnit()) \|\|
	(isa<LinkageSpecDecl>(FD->getDeclContext()) &&
	cast<LinkageSpecDecl>(FD->getDeclContext())->getLanguage() ==
	LinkageSpecDecl::lang_c)) {
	// Okay: this could be a libc/libm/Objective-C function we know
	// about.
	} else
	return;

	if (Name->isStr("asprintf") \|\| Name->isStr("vasprintf")) {
	// FIXME: asprintf and vasprintf aren't C99 functions. Should they be
	// target-specific builtins, perhaps?
	if (!FD->hasAttr<FormatAttr>())
	FD->addAttr(FormatAttr::CreateImplicit(Context,
	&Context.Idents.get("printf"), 2,
	Name->isStr("vasprintf") ? 0 : 3,
	FD->getLocation()));
	}

	if (Name->isStr("__CFStringMakeConstantString")) {
	// We already have a __builtin___CFStringMakeConstantString,
	// but builds that use -fno-constant-cfstrings don't go through that.
	if (!FD->hasAttr<FormatArgAttr>())
	FD->addAttr(FormatArgAttr::CreateImplicit(Context, ParamIdx(1, FD),
	FD->getLocation()));
	}
	}

	TypedefDecl Sema::ParseTypedefDecl(Scope S, Declarator &D, QualType T,
	TypeSourceInfo *TInfo) {
	assert(D.getIdentifier() && "Wrong callback for declspec without declarator");
	assert(!T.isNull() && "GetTypeForDeclarator() returned null type");

	if (!TInfo) {
	assert(D.isInvalidType() && "no declarator info for valid type");
	TInfo = Context.getTrivialTypeSourceInfo(T);
	}

	// Scope manipulation handled by caller.
	TypedefDecl *NewTD =
	TypedefDecl::Create(Context, CurContext, D.getBeginLoc(),
	D.getIdentifierLoc(), D.getIdentifier(), TInfo);

	// Bail out immediately if we have an invalid declaration.
	if (D.isInvalidType()) {
	NewTD->setInvalidDecl();
	return NewTD;
	}

	if (D.getDeclSpec().isModulePrivateSpecified()) {
	if (CurContext->isFunctionOrMethod())
	Diag(NewTD->getLocation(), diag::err_module_private_local)
	<< 2 << NewTD
	<< SourceRange(D.getDeclSpec().getModulePrivateSpecLoc())
	<< FixItHint::CreateRemoval(
	D.getDeclSpec().getModulePrivateSpecLoc());
	else
	NewTD->setModulePrivate();
	}

	// C++ [dcl.typedef]p8:
	// If the typedef declaration defines an unnamed class (or
	// enum), the first typedef-name declared by the declaration
	// to be that class type (or enum type) is used to denote the
	// class type (or enum type) for linkage purposes only.
	// We need to check whether the type was declared in the declaration.
	switch (D.getDeclSpec().getTypeSpecType()) {
	case TST_enum:
	case TST_struct:
	case TST_interface:
	case TST_union:
	case TST_class: {
	TagDecl *tagFromDeclSpec = cast<TagDecl>(D.getDeclSpec().getRepAsDecl());
	setTagNameForLinkagePurposes(tagFromDeclSpec, NewTD);
	break;
	}

	default:
	break;
	}

	return NewTD;
	}

	/// Check that this is a valid underlying type for an enum declaration.
	bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
	SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc();
	QualType T = TI->getType();

	if (T->isDependentType())
	return false;

	// This doesn't use 'isIntegralType' despite the error message mentioning
	// integral type because isIntegralType would also allow enum types in C.
	if (const BuiltinType *BT = T->getAs<BuiltinType>())
	if (BT->isInteger())
	return false;

	if (T->isBitIntType())
	return false;

	return Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
	}

	/// Check whether this is a valid redeclaration of a previous enumeration.
	/// \return true if the redeclaration was invalid.
	bool Sema::CheckEnumRedeclaration(SourceLocation EnumLoc, bool IsScoped,
	QualType EnumUnderlyingTy, bool IsFixed,
	const EnumDecl *Prev) {
	if (IsScoped != Prev->isScoped()) {
	Diag(EnumLoc, diag::err_enum_redeclare_scoped_mismatch)
	<< Prev->isScoped();
	Diag(Prev->getLocation(), diag::note_previous_declaration);
	return true;
	}

	if (IsFixed && Prev->isFixed()) {
	if (!EnumUnderlyingTy->isDependentType() &&
	!Prev->getIntegerType()->isDependentType() &&
	!Context.hasSameUnqualifiedType(EnumUnderlyingTy,
	Prev->getIntegerType())) {
	// TODO: Highlight the underlying type of the redeclaration.
	Diag(EnumLoc, diag::err_enum_redeclare_type_mismatch)
	<< EnumUnderlyingTy << Prev->getIntegerType();
	Diag(Prev->getLocation(), diag::note_previous_declaration)
	<< Prev->getIntegerTypeRange();
	return true;
	}
	} else if (IsFixed != Prev->isFixed()) {
	Diag(EnumLoc, diag::err_enum_redeclare_fixed_mismatch)
	<< Prev->isFixed();
	Diag(Prev->getLocation(), diag::note_previous_declaration);
	return true;
	}

	return false;
	}

	/// Get diagnostic %select index for tag kind for
	/// redeclaration diagnostic message.
	/// WARNING: Indexes apply to particular diagnostics only!
	///
	/// \returns diagnostic %select index.
	static unsigned getRedeclDiagFromTagKind(TagTypeKind Tag) {
	switch (Tag) {
	case TTK_Struct: return 0;
	case TTK_Interface: return 1;
	case TTK_Class: return 2;
	default: llvm_unreachable("Invalid tag kind for redecl diagnostic!");
	}
	}

	/// Determine if tag kind is a class-key compatible with
	/// class for redeclaration (class, struct, or __interface).
	///
	/// \returns true iff the tag kind is compatible.
	static bool isClassCompatTagKind(TagTypeKind Tag)
	{
	return Tag == TTK_Struct \|\| Tag == TTK_Class \|\| Tag == TTK_Interface;
	}

	Sema::NonTagKind Sema::getNonTagTypeDeclKind(const Decl *PrevDecl,
	TagTypeKind TTK) {
	if (isa<TypedefDecl>(PrevDecl))
	return NTK_Typedef;
	else if (isa<TypeAliasDecl>(PrevDecl))
	return NTK_TypeAlias;
	else if (isa<ClassTemplateDecl>(PrevDecl))
	return NTK_Template;
	else if (isa<TypeAliasTemplateDecl>(PrevDecl))
	return NTK_TypeAliasTemplate;
	else if (isa<TemplateTemplateParmDecl>(PrevDecl))
	return NTK_TemplateTemplateArgument;
	switch (TTK) {
	case TTK_Struct:
	case TTK_Interface:
	case TTK_Class:
	return getLangOpts().CPlusPlus ? NTK_NonClass : NTK_NonStruct;
	case TTK_Union:
	return NTK_NonUnion;
	case TTK_Enum:
	return NTK_NonEnum;
	}
	llvm_unreachable("invalid TTK");
	}

	/// Determine whether a tag with a given kind is acceptable
	/// as a redeclaration of the given tag declaration.
	///
	/// \returns true if the new tag kind is acceptable, false otherwise.
	bool Sema::isAcceptableTagRedeclaration(const TagDecl *Previous,
	TagTypeKind NewTag, bool isDefinition,
	SourceLocation NewTagLoc,
	const IdentifierInfo *Name) {
	// C++ [dcl.type.elab]p3:
	// The class-key or enum keyword present in the
	// elaborated-type-specifier shall agree in kind with the
	// declaration to which the name in the elaborated-type-specifier
	// refers. This rule also applies to the form of
	// elaborated-type-specifier that declares a class-name or
	// friend class since it can be construed as referring to the
	// definition of the class. Thus, in any
	// elaborated-type-specifier, the enum keyword shall be used to
	// refer to an enumeration (7.2), the union class-key shall be
	// used to refer to a union (clause 9), and either the class or
	// struct class-key shall be used to refer to a class (clause 9)
	// declared using the class or struct class-key.
	TagTypeKind OldTag = Previous->getTagKind();
	if (OldTag != NewTag &&
	!(isClassCompatTagKind(OldTag) && isClassCompatTagKind(NewTag)))
	return false;

	// Tags are compatible, but we might still want to warn on mismatched tags.
	// Non-class tags can't be mismatched at this point.
	if (!isClassCompatTagKind(NewTag))
	return true;

	// Declarations for which -Wmismatched-tags is disabled are entirely ignored
	// by our warning analysis. We don't want to warn about mismatches with (eg)
	// declarations in system headers that are designed to be specialized, but if
	// a user asks us to warn, we should warn if their code contains mismatched
	// declarations.
	auto IsIgnoredLoc = [&](SourceLocation Loc) {
	return getDiagnostics().isIgnored(diag::warn_struct_class_tag_mismatch,
	Loc);
	};
	if (IsIgnoredLoc(NewTagLoc))
	return true;

	auto IsIgnored = [&](const TagDecl *Tag) {
	return IsIgnoredLoc(Tag->getLocation());
	};
	while (IsIgnored(Previous)) {
	Previous = Previous->getPreviousDecl();
	if (!Previous)
	return true;
	OldTag = Previous->getTagKind();
	}

	bool isTemplate = false;
	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(Previous))
	isTemplate = Record->getDescribedClassTemplate();

	if (inTemplateInstantiation()) {
	if (OldTag != NewTag) {
	// In a template instantiation, do not offer fix-its for tag mismatches
	// since they usually mess up the template instead of fixing the problem.
	Diag(NewTagLoc, diag::warn_struct_class_tag_mismatch)
	<< getRedeclDiagFromTagKind(NewTag) << isTemplate << Name
	<< getRedeclDiagFromTagKind(OldTag);
	// FIXME: Note previous location?
	}
	return true;
	}

	if (isDefinition) {
	// On definitions, check all previous tags and issue a fix-it for each
	// one that doesn't match the current tag.
	if (Previous->getDefinition()) {
	// Don't suggest fix-its for redefinitions.
	return true;
	}

	bool previousMismatch = false;
	for (const TagDecl *I : Previous->redecls()) {
	if (I->getTagKind() != NewTag) {
	// Ignore previous declarations for which the warning was disabled.
	if (IsIgnored(I))
	continue;

	if (!previousMismatch) {
	previousMismatch = true;
	Diag(NewTagLoc, diag::warn_struct_class_previous_tag_mismatch)
	<< getRedeclDiagFromTagKind(NewTag) << isTemplate << Name
	<< getRedeclDiagFromTagKind(I->getTagKind());
	}
	Diag(I->getInnerLocStart(), diag::note_struct_class_suggestion)
	<< getRedeclDiagFromTagKind(NewTag)
	<< FixItHint::CreateReplacement(I->getInnerLocStart(),
	TypeWithKeyword::getTagTypeKindName(NewTag));
	}
	}
	return true;
	}

	// Identify the prevailing tag kind: this is the kind of the definition (if
	// there is a non-ignored definition), or otherwise the kind of the prior
	// (non-ignored) declaration.
	const TagDecl *PrevDef = Previous->getDefinition();
	if (PrevDef && IsIgnored(PrevDef))
	PrevDef = nullptr;
	const TagDecl *Redecl = PrevDef ? PrevDef : Previous;
	if (Redecl->getTagKind() != NewTag) {
	Diag(NewTagLoc, diag::warn_struct_class_tag_mismatch)
	<< getRedeclDiagFromTagKind(NewTag) << isTemplate << Name
	<< getRedeclDiagFromTagKind(OldTag);
	Diag(Redecl->getLocation(), diag::note_previous_use);

	// If there is a previous definition, suggest a fix-it.
	if (PrevDef) {
	Diag(NewTagLoc, diag::note_struct_class_suggestion)
	<< getRedeclDiagFromTagKind(Redecl->getTagKind())
	<< FixItHint::CreateReplacement(SourceRange(NewTagLoc),
	TypeWithKeyword::getTagTypeKindName(Redecl->getTagKind()));
	}
	}

	return true;
	}

	/// Add a minimal nested name specifier fixit hint to allow lookup of a tag name
	/// from an outer enclosing namespace or file scope inside a friend declaration.
	/// This should provide the commented out code in the following snippet:
	/// namespace N {
	/// struct X;
	/// namespace M {
	/// struct Y { friend struct /N::/ X; };
	/// }
	/// }
	static FixItHint createFriendTagNNSFixIt(Sema &SemaRef, NamedDecl ND, Scope S,
	SourceLocation NameLoc) {
	// While the decl is in a namespace, do repeated lookup of that name and see
	// if we get the same namespace back. If we do not, continue until
	// translation unit scope, at which point we have a fully qualified NNS.
	SmallVector<IdentifierInfo *, 4> Namespaces;
	DeclContext *DC = ND->getDeclContext()->getRedeclContext();
	for (; !DC->isTranslationUnit(); DC = DC->getParent()) {
	// This tag should be declared in a namespace, which can only be enclosed by
	// other namespaces. Bail if there's an anonymous namespace in the chain.
	NamespaceDecl *Namespace = dyn_cast<NamespaceDecl>(DC);
	if (!Namespace \|\| Namespace->isAnonymousNamespace())
	return FixItHint();
	IdentifierInfo *II = Namespace->getIdentifier();
	Namespaces.push_back(II);
	NamedDecl *Lookup = SemaRef.LookupSingleName(
	S, II, NameLoc, Sema::LookupNestedNameSpecifierName);
	if (Lookup == Namespace)
	break;
	}

	// Once we have all the namespaces, reverse them to go outermost first, and
	// build an NNS.
	SmallString<64> Insertion;
	llvm::raw_svector_ostream OS(Insertion);
	if (DC->isTranslationUnit())
	OS << "::";
	std::reverse(Namespaces.begin(), Namespaces.end());
	for (auto *II : Namespaces)
	OS << II->getName() << "::";
	return FixItHint::CreateInsertion(NameLoc, Insertion);
	}

	/// Determine whether a tag originally declared in context \p OldDC can
	/// be redeclared with an unqualified name in \p NewDC (assuming name lookup
	/// found a declaration in \p OldDC as a previous decl, perhaps through a
	/// using-declaration).
	static bool isAcceptableTagRedeclContext(Sema &S, DeclContext *OldDC,
	DeclContext *NewDC) {
	OldDC = OldDC->getRedeclContext();
	NewDC = NewDC->getRedeclContext();

	if (OldDC->Equals(NewDC))
	return true;

	// In MSVC mode, we allow a redeclaration if the contexts are related (either
	// encloses the other).
	if (S.getLangOpts().MSVCCompat &&
	(OldDC->Encloses(NewDC) \|\| NewDC->Encloses(OldDC)))
	return true;

	return false;
	}

	/// This is invoked when we see 'struct foo' or 'struct {'. In the
	/// former case, Name will be non-null. In the later case, Name will be null.
	/// TagSpec indicates what kind of tag this is. TUK indicates whether this is a
	/// reference/declaration/definition of a tag.
	///
	/// \param IsTypeSpecifier \c true if this is a type-specifier (or
	/// trailing-type-specifier) other than one in an alias-declaration.
	///
	/// \param SkipBody If non-null, will be set to indicate if the caller should
	/// skip the definition of this tag and treat it as if it were a declaration.
	DeclResult
	Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc,
	CXXScopeSpec &SS, IdentifierInfo *Name, SourceLocation NameLoc,
	const ParsedAttributesView &Attrs, AccessSpecifier AS,
	SourceLocation ModulePrivateLoc,
	MultiTemplateParamsArg TemplateParameterLists, bool &OwnedDecl,
	bool &IsDependent, SourceLocation ScopedEnumKWLoc,
	bool ScopedEnumUsesClassTag, TypeResult UnderlyingType,
	bool IsTypeSpecifier, bool IsTemplateParamOrArg,
	OffsetOfKind OOK, SkipBodyInfo *SkipBody) {
	// If this is not a definition, it must have a name.
	IdentifierInfo *OrigName = Name;
	assert((Name != nullptr \|\| TUK == TUK_Definition) &&
	"Nameless record must be a definition!");
	assert(TemplateParameterLists.size() == 0 \|\| TUK != TUK_Reference);

	OwnedDecl = false;
	TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForTypeSpec(TagSpec);
	bool ScopedEnum = ScopedEnumKWLoc.isValid();

	// FIXME: Check member specializations more carefully.
	bool isMemberSpecialization = false;
	bool Invalid = false;

	// We only need to do this matching if we have template parameters
	// or a scope specifier, which also conveniently avoids this work
	// for non-C++ cases.
	if (TemplateParameterLists.size() > 0 \|\|
	(SS.isNotEmpty() && TUK != TUK_Reference)) {
	if (TemplateParameterList *TemplateParams =
	MatchTemplateParametersToScopeSpecifier(
	KWLoc, NameLoc, SS, nullptr, TemplateParameterLists,
	TUK == TUK_Friend, isMemberSpecialization, Invalid)) {
	if (Kind == TTK_Enum) {
	Diag(KWLoc, diag::err_enum_template);
	return true;
	}

	if (TemplateParams->size() > 0) {
	// This is a declaration or definition of a class template (which may
	// be a member of another template).

	if (Invalid)
	return true;

	OwnedDecl = false;
	DeclResult Result = CheckClassTemplate(
	S, TagSpec, TUK, KWLoc, SS, Name, NameLoc, Attrs, TemplateParams,
	AS, ModulePrivateLoc,
	/FriendLoc/ SourceLocation(), TemplateParameterLists.size() - 1,
	TemplateParameterLists.data(), SkipBody);
	return Result.get();
	} else {
	// The "template<>" header is extraneous.
	Diag(TemplateParams->getTemplateLoc(), diag::err_template_tag_noparams)
	<< TypeWithKeyword::getTagTypeKindName(Kind) << Name;
	isMemberSpecialization = true;
	}
	}

	if (!TemplateParameterLists.empty() && isMemberSpecialization &&
	CheckTemplateDeclScope(S, TemplateParameterLists.back()))
	return true;
	}

	// Figure out the underlying type if this a enum declaration. We need to do
	// this early, because it's needed to detect if this is an incompatible
	// redeclaration.
	llvm::PointerUnion<const Type, TypeSourceInfo> EnumUnderlying;
	bool IsFixed = !UnderlyingType.isUnset() \|\| ScopedEnum;

	if (Kind == TTK_Enum) {
	if (UnderlyingType.isInvalid() \|\| (!UnderlyingType.get() && ScopedEnum)) {
	// No underlying type explicitly specified, or we failed to parse the
	// type, default to int.
	EnumUnderlying = Context.IntTy.getTypePtr();
	} else if (UnderlyingType.get()) {
	// C++0x 7.2p2: The type-specifier-seq of an enum-base shall name an
	// integral type; any cv-qualification is ignored.
	TypeSourceInfo *TI = nullptr;
	GetTypeFromParser(UnderlyingType.get(), &TI);
	EnumUnderlying = TI;

	if (CheckEnumUnderlyingType(TI))
	// Recover by falling back to int.
	EnumUnderlying = Context.IntTy.getTypePtr();

	if (DiagnoseUnexpandedParameterPack(TI->getTypeLoc().getBeginLoc(), TI,
	UPPC_FixedUnderlyingType))
	EnumUnderlying = Context.IntTy.getTypePtr();

	} else if (Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment()) {
	// For MSVC ABI compatibility, unfixed enums must use an underlying type
	// of 'int'. However, if this is an unfixed forward declaration, don't set
	// the underlying type unless the user enables -fms-compatibility. This
	// makes unfixed forward declared enums incomplete and is more conforming.
	if (TUK == TUK_Definition \|\| getLangOpts().MSVCCompat)
	EnumUnderlying = Context.IntTy.getTypePtr();
	}
	}

	DeclContext *SearchDC = CurContext;
	DeclContext *DC = CurContext;
	bool isStdBadAlloc = false;
	bool isStdAlignValT = false;

	RedeclarationKind Redecl = forRedeclarationInCurContext();
	if (TUK == TUK_Friend \|\| TUK == TUK_Reference)
	Redecl = NotForRedeclaration;

	/// Create a new tag decl in C/ObjC. Since the ODR-like semantics for ObjC/C
	/// implemented asks for structural equivalence checking, the returned decl
	/// here is passed back to the parser, allowing the tag body to be parsed.
	auto createTagFromNewDecl = [&]() -> TagDecl * {
	assert(!getLangOpts().CPlusPlus && "not meant for C++ usage");
	// If there is an identifier, use the location of the identifier as the
	// location of the decl, otherwise use the location of the struct/union
	// keyword.
	SourceLocation Loc = NameLoc.isValid() ? NameLoc : KWLoc;
	TagDecl *New = nullptr;

	if (Kind == TTK_Enum) {
	New = EnumDecl::Create(Context, SearchDC, KWLoc, Loc, Name, nullptr,
	ScopedEnum, ScopedEnumUsesClassTag, IsFixed);
	// If this is an undefined enum, bail.
	if (TUK != TUK_Definition && !Invalid)
	return nullptr;
	if (EnumUnderlying) {
	EnumDecl *ED = cast<EnumDecl>(New);
	if (TypeSourceInfo TI = EnumUnderlying.dyn_cast<TypeSourceInfo >())
	ED->setIntegerTypeSourceInfo(TI);
	else
	ED->setIntegerType(QualType(EnumUnderlying.get<const Type *>(), 0));
	QualType EnumTy = ED->getIntegerType();
	ED->setPromotionType(Context.isPromotableIntegerType(EnumTy)
	? Context.getPromotedIntegerType(EnumTy)
	: EnumTy);
	}
	} else { // struct/union
	New = RecordDecl::Create(Context, Kind, SearchDC, KWLoc, Loc, Name,
	nullptr);
	}

	if (RecordDecl *RD = dyn_cast<RecordDecl>(New)) {
	// Add alignment attributes if necessary; these attributes are checked
	// when the ASTContext lays out the structure.
	//
	// It is important for implementing the correct semantics that this
	// happen here (in ActOnTag). The #pragma pack stack is
	// maintained as a result of parser callbacks which can occur at
	// many points during the parsing of a struct declaration (because
	// the #pragma tokens are effectively skipped over during the
	// parsing of the struct).
	if (TUK == TUK_Definition && (!SkipBody \|\| !SkipBody->ShouldSkip)) {
	AddAlignmentAttributesForRecord(RD);
	AddMsStructLayoutForRecord(RD);
	}
	}
	New->setLexicalDeclContext(CurContext);
	return New;
	};

	LookupResult Previous(*this, Name, NameLoc, LookupTagName, Redecl);
	if (Name && SS.isNotEmpty()) {
	// We have a nested-name tag ('struct foo::bar').

	// Check for invalid 'foo::'.
	if (SS.isInvalid()) {
	Name = nullptr;
	goto CreateNewDecl;
	}

	// If this is a friend or a reference to a class in a dependent
	// context, don't try to make a decl for it.
	if (TUK == TUK_Friend \|\| TUK == TUK_Reference) {
	DC = computeDeclContext(SS, false);
	if (!DC) {
	IsDependent = true;
	return true;
	}
	} else {
	DC = computeDeclContext(SS, true);
	if (!DC) {
	Diag(SS.getRange().getBegin(), diag::err_dependent_nested_name_spec)
	<< SS.getRange();
	return true;
	}
	}

	if (RequireCompleteDeclContext(SS, DC))
	return true;

	SearchDC = DC;
	// Look-up name inside 'foo::'.
	LookupQualifiedName(Previous, DC);

	if (Previous.isAmbiguous())
	return true;

	if (Previous.empty()) {
	// Name lookup did not find anything. However, if the
	// nested-name-specifier refers to the current instantiation,
	// and that current instantiation has any dependent base
	// classes, we might find something at instantiation time: treat
	// this as a dependent elaborated-type-specifier.
	// But this only makes any sense for reference-like lookups.
	if (Previous.wasNotFoundInCurrentInstantiation() &&
	(TUK == TUK_Reference \|\| TUK == TUK_Friend)) {
	IsDependent = true;
	return true;
	}

	// A tag 'foo::bar' must already exist.
	Diag(NameLoc, diag::err_not_tag_in_scope)
	<< Kind << Name << DC << SS.getRange();
	Name = nullptr;
	Invalid = true;
	goto CreateNewDecl;
	}
	} else if (Name) {
	// C++14 [class.mem]p14:
	// If T is the name of a class, then each of the following shall have a
	// name different from T:
	// -- every member of class T that is itself a type
	if (TUK != TUK_Reference && TUK != TUK_Friend &&
	DiagnoseClassNameShadow(SearchDC, DeclarationNameInfo(Name, NameLoc)))
	return true;

	// If this is a named struct, check to see if there was a previous forward
	// declaration or definition.
	// FIXME: We're looking into outer scopes here, even when we
	// shouldn't be. Doing so can result in ambiguities that we
	// shouldn't be diagnosing.
	LookupName(Previous, S);

	// When declaring or defining a tag, ignore ambiguities introduced
	// by types using'ed into this scope.
	if (Previous.isAmbiguous() &&
	(TUK == TUK_Definition \|\| TUK == TUK_Declaration)) {
	LookupResult::Filter F = Previous.makeFilter();
	while (F.hasNext()) {
	NamedDecl *ND = F.next();
	if (!ND->getDeclContext()->getRedeclContext()->Equals(
	SearchDC->getRedeclContext()))
	F.erase();
	}
	F.done();
	}

	// C++11 [namespace.memdef]p3:
	// If the name in a friend declaration is neither qualified nor
	// a template-id and the declaration is a function or an
	// elaborated-type-specifier, the lookup to determine whether
	// the entity has been previously declared shall not consider
	// any scopes outside the innermost enclosing namespace.
	//
	// MSVC doesn't implement the above rule for types, so a friend tag
	// declaration may be a redeclaration of a type declared in an enclosing
	// scope. They do implement this rule for friend functions.
	//
	// Does it matter that this should be by scope instead of by
	// semantic context?
	if (!Previous.empty() && TUK == TUK_Friend) {
	DeclContext *EnclosingNS = SearchDC->getEnclosingNamespaceContext();
	LookupResult::Filter F = Previous.makeFilter();
	bool FriendSawTagOutsideEnclosingNamespace = false;
	while (F.hasNext()) {
	NamedDecl *ND = F.next();
	DeclContext *DC = ND->getDeclContext()->getRedeclContext();
	if (DC->isFileContext() &&
	!EnclosingNS->Encloses(ND->getDeclContext())) {
	if (getLangOpts().MSVCCompat)
	FriendSawTagOutsideEnclosingNamespace = true;
	else
	F.erase();
	}
	}
	F.done();

	// Diagnose this MSVC extension in the easy case where lookup would have
	// unambiguously found something outside the enclosing namespace.
	if (Previous.isSingleResult() && FriendSawTagOutsideEnclosingNamespace) {
	NamedDecl *ND = Previous.getFoundDecl();
	Diag(NameLoc, diag::ext_friend_tag_redecl_outside_namespace)
	<< createFriendTagNNSFixIt(*this, ND, S, NameLoc);
	}
	}

	// Note: there used to be some attempt at recovery here.
	if (Previous.isAmbiguous())
	return true;

	if (!getLangOpts().CPlusPlus && TUK != TUK_Reference) {
	// FIXME: This makes sure that we ignore the contexts associated
	// with C structs, unions, and enums when looking for a matching
	// tag declaration or definition. See the similar lookup tweak
	// in Sema::LookupName; is there a better way to deal with this?
	while (isa<RecordDecl, EnumDecl, ObjCContainerDecl>(SearchDC))
	SearchDC = SearchDC->getParent();
	} else if (getLangOpts().CPlusPlus) {
	// Inside ObjCContainer want to keep it as a lexical decl context but go
	// past it (most often to TranslationUnit) to find the semantic decl
	// context.
	while (isa<ObjCContainerDecl>(SearchDC))
	SearchDC = SearchDC->getParent();
	}
	} else if (getLangOpts().CPlusPlus) {
	// Don't use ObjCContainerDecl as the semantic decl context for anonymous
	// TagDecl the same way as we skip it for named TagDecl.
	while (isa<ObjCContainerDecl>(SearchDC))
	SearchDC = SearchDC->getParent();
	}

	if (Previous.isSingleResult() &&
	Previous.getFoundDecl()->isTemplateParameter()) {
	// Maybe we will complain about the shadowed template parameter.
	DiagnoseTemplateParameterShadow(NameLoc, Previous.getFoundDecl());
	// Just pretend that we didn't see the previous declaration.
	Previous.clear();
	}

	if (getLangOpts().CPlusPlus && Name && DC && StdNamespace &&
	DC->Equals(getStdNamespace())) {
	if (Name->isStr("bad_alloc")) {
	// This is a declaration of or a reference to "std::bad_alloc".
	isStdBadAlloc = true;

	// If std::bad_alloc has been implicitly declared (but made invisible to
	// name lookup), fill in this implicit declaration as the previous
	// declaration, so that the declarations get chained appropriately.
	if (Previous.empty() && StdBadAlloc)
	Previous.addDecl(getStdBadAlloc());
	} else if (Name->isStr("align_val_t")) {
	isStdAlignValT = true;
	if (Previous.empty() && StdAlignValT)
	Previous.addDecl(getStdAlignValT());
	}
	}

	// If we didn't find a previous declaration, and this is a reference
	// (or friend reference), move to the correct scope. In C++, we
	// also need to do a redeclaration lookup there, just in case
	// there's a shadow friend decl.
	if (Name && Previous.empty() &&
	(TUK == TUK_Reference \|\| TUK == TUK_Friend \|\| IsTemplateParamOrArg)) {
	if (Invalid) goto CreateNewDecl;
	assert(SS.isEmpty());

	if (TUK == TUK_Reference \|\| IsTemplateParamOrArg) {
	// C++ [basic.scope.pdecl]p5:
	// -- for an elaborated-type-specifier of the form
	//
	// class-key identifier
	//
	// if the elaborated-type-specifier is used in the
	// decl-specifier-seq or parameter-declaration-clause of a
	// function defined in namespace scope, the identifier is
	// declared as a class-name in the namespace that contains
	// the declaration; otherwise, except as a friend
	// declaration, the identifier is declared in the smallest
	// non-class, non-function-prototype scope that contains the
	// declaration.
	//
	// C99 6.7.2.3p8 has a similar (but not identical!) provision for
	// C structs and unions.
	//
	// It is an error in C++ to declare (rather than define) an enum
	// type, including via an elaborated type specifier. We'll
	// diagnose that later; for now, declare the enum in the same
	// scope as we would have picked for any other tag type.
	//
	// GNU C also supports this behavior as part of its incomplete
	// enum types extension, while GNU C++ does not.
	//
	// Find the context where we'll be declaring the tag.
	// FIXME: We would like to maintain the current DeclContext as the
	// lexical context,
	SearchDC = getTagInjectionContext(SearchDC);

	// Find the scope where we'll be declaring the tag.
	S = getTagInjectionScope(S, getLangOpts());
	} else {
	assert(TUK == TUK_Friend);
	// C++ [namespace.memdef]p3:
	// If a friend declaration in a non-local class first declares a
	// class or function, the friend class or function is a member of
	// the innermost enclosing namespace.
	SearchDC = SearchDC->getEnclosingNamespaceContext();
	}

	// In C++, we need to do a redeclaration lookup to properly
	// diagnose some problems.
	// FIXME: redeclaration lookup is also used (with and without C++) to find a
	// hidden declaration so that we don't get ambiguity errors when using a
	// type declared by an elaborated-type-specifier. In C that is not correct
	// and we should instead merge compatible types found by lookup.
	if (getLangOpts().CPlusPlus) {
	// FIXME: This can perform qualified lookups into function contexts,
	// which are meaningless.
	Previous.setRedeclarationKind(forRedeclarationInCurContext());
	LookupQualifiedName(Previous, SearchDC);
	} else {
	Previous.setRedeclarationKind(forRedeclarationInCurContext());
	LookupName(Previous, S);
	}
	}

	// If we have a known previous declaration to use, then use it.
	if (Previous.empty() && SkipBody && SkipBody->Previous)
	Previous.addDecl(SkipBody->Previous);

	if (!Previous.empty()) {
	NamedDecl *PrevDecl = Previous.getFoundDecl();
	NamedDecl *DirectPrevDecl = Previous.getRepresentativeDecl();

	// It's okay to have a tag decl in the same scope as a typedef
	// which hides a tag decl in the same scope. Finding this
	// with a redeclaration lookup can only actually happen in C++.
	//
	// This is also okay for elaborated-type-specifiers, which is
	// technically forbidden by the current standard but which is
	// okay according to the likely resolution of an open issue;
	// see http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#407
	if (getLangOpts().CPlusPlus) {
	if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(PrevDecl)) {
	if (const TagType *TT = TD->getUnderlyingType()->getAs<TagType>()) {
	TagDecl *Tag = TT->getDecl();
	if (Tag->getDeclName() == Name &&
	Tag->getDeclContext()->getRedeclContext()
	->Equals(TD->getDeclContext()->getRedeclContext())) {
	PrevDecl = Tag;
	Previous.clear();
	Previous.addDecl(Tag);
	Previous.resolveKind();
	}
	}
	}
	}

	// If this is a redeclaration of a using shadow declaration, it must
	// declare a tag in the same context. In MSVC mode, we allow a
	// redefinition if either context is within the other.
	if (auto *Shadow = dyn_cast<UsingShadowDecl>(DirectPrevDecl)) {
	auto *OldTag = dyn_cast<TagDecl>(PrevDecl);
	if (SS.isEmpty() && TUK != TUK_Reference && TUK != TUK_Friend &&
	isDeclInScope(Shadow, SearchDC, S, isMemberSpecialization) &&
	!(OldTag && isAcceptableTagRedeclContext(
	*this, OldTag->getDeclContext(), SearchDC))) {
	Diag(KWLoc, diag::err_using_decl_conflict_reverse);
	Diag(Shadow->getTargetDecl()->getLocation(),
	diag::note_using_decl_target);
	Diag(Shadow->getIntroducer()->getLocation(), diag::note_using_decl)
	<< 0;
	// Recover by ignoring the old declaration.
	Previous.clear();
	goto CreateNewDecl;
	}
	}

	if (TagDecl *PrevTagDecl = dyn_cast<TagDecl>(PrevDecl)) {
	// If this is a use of a previous tag, or if the tag is already declared
	// in the same scope (so that the definition/declaration completes or
	// rementions the tag), reuse the decl.
	if (TUK == TUK_Reference \|\| TUK == TUK_Friend \|\|
	isDeclInScope(DirectPrevDecl, SearchDC, S,
	SS.isNotEmpty() \|\| isMemberSpecialization)) {
	// Make sure that this wasn't declared as an enum and now used as a
	// struct or something similar.
	if (!isAcceptableTagRedeclaration(PrevTagDecl, Kind,
	TUK == TUK_Definition, KWLoc,
	Name)) {
	bool SafeToContinue
	= (PrevTagDecl->getTagKind() != TTK_Enum &&
	Kind != TTK_Enum);
	if (SafeToContinue)
	Diag(KWLoc, diag::err_use_with_wrong_tag)
	<< Name
	<< FixItHint::CreateReplacement(SourceRange(KWLoc),
	PrevTagDecl->getKindName());
	else
	Diag(KWLoc, diag::err_use_with_wrong_tag) << Name;
	Diag(PrevTagDecl->getLocation(), diag::note_previous_use);

	if (SafeToContinue)
	Kind = PrevTagDecl->getTagKind();
	else {
	// Recover by making this an anonymous redefinition.
	Name = nullptr;
	Previous.clear();
	Invalid = true;
	}
	}

	if (Kind == TTK_Enum && PrevTagDecl->getTagKind() == TTK_Enum) {
	const EnumDecl *PrevEnum = cast<EnumDecl>(PrevTagDecl);
	if (TUK == TUK_Reference \|\| TUK == TUK_Friend)
	return PrevTagDecl;

	QualType EnumUnderlyingTy;
	if (TypeSourceInfo TI = EnumUnderlying.dyn_cast<TypeSourceInfo>())
	EnumUnderlyingTy = TI->getType().getUnqualifiedType();
	else if (const Type T = EnumUnderlying.dyn_cast<const Type>())
	EnumUnderlyingTy = QualType(T, 0);

	// All conflicts with previous declarations are recovered by
	// returning the previous declaration, unless this is a definition,
	// in which case we want the caller to bail out.
	if (CheckEnumRedeclaration(NameLoc.isValid() ? NameLoc : KWLoc,
	ScopedEnum, EnumUnderlyingTy,
	IsFixed, PrevEnum))
	return TUK == TUK_Declaration ? PrevTagDecl : nullptr;
	}

	// C++11 [class.mem]p1:
	// A member shall not be declared twice in the member-specification,
	// except that a nested class or member class template can be declared
	// and then later defined.
	if (TUK == TUK_Declaration && PrevDecl->isCXXClassMember() &&
	S->isDeclScope(PrevDecl)) {
	Diag(NameLoc, diag::ext_member_redeclared);
	Diag(PrevTagDecl->getLocation(), diag::note_previous_declaration);
	}

	if (!Invalid) {
	// If this is a use, just return the declaration we found, unless
	// we have attributes.
	if (TUK == TUK_Reference \|\| TUK == TUK_Friend) {
	if (!Attrs.empty()) {
	// FIXME: Diagnose these attributes. For now, we create a new
	// declaration to hold them.
	} else if (TUK == TUK_Reference &&
	(PrevTagDecl->getFriendObjectKind() ==
	Decl::FOK_Undeclared \|\|
	PrevDecl->getOwningModule() != getCurrentModule()) &&
	SS.isEmpty()) {
	// This declaration is a reference to an existing entity, but
	// has different visibility from that entity: it either makes
	// a friend visible or it makes a type visible in a new module.
	// In either case, create a new declaration. We only do this if
	// the declaration would have meant the same thing if no prior
	// declaration were found, that is, if it was found in the same
	// scope where we would have injected a declaration.
	if (!getTagInjectionContext(CurContext)->getRedeclContext()
	->Equals(PrevDecl->getDeclContext()->getRedeclContext()))
	return PrevTagDecl;
	// This is in the injected scope, create a new declaration in
	// that scope.
	S = getTagInjectionScope(S, getLangOpts());
	} else {
	return PrevTagDecl;
	}
	}

	// Diagnose attempts to redefine a tag.
	if (TUK == TUK_Definition) {
	if (NamedDecl *Def = PrevTagDecl->getDefinition()) {
	// If we're defining a specialization and the previous definition
	// is from an implicit instantiation, don't emit an error
	// here; we'll catch this in the general case below.
	bool IsExplicitSpecializationAfterInstantiation = false;
	if (isMemberSpecialization) {
	if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Def))
	IsExplicitSpecializationAfterInstantiation =
	RD->getTemplateSpecializationKind() !=
	TSK_ExplicitSpecialization;
	else if (EnumDecl *ED = dyn_cast<EnumDecl>(Def))
	IsExplicitSpecializationAfterInstantiation =
	ED->getTemplateSpecializationKind() !=
	TSK_ExplicitSpecialization;
	}

	// Note that clang allows ODR-like semantics for ObjC/C, i.e., do
	// not keep more that one definition around (merge them). However,
	// ensure the decl passes the structural compatibility check in
	// C11 6.2.7/1 (or 6.1.2.6/1 in C89).
	NamedDecl *Hidden = nullptr;
	if (SkipBody && !hasVisibleDefinition(Def, &Hidden)) {
	// There is a definition of this tag, but it is not visible. We
	// explicitly make use of C++'s one definition rule here, and
	// assume that this definition is identical to the hidden one
	// we already have. Make the existing definition visible and
	// use it in place of this one.
	if (!getLangOpts().CPlusPlus) {
	// Postpone making the old definition visible until after we
	// complete parsing the new one and do the structural
	// comparison.
	SkipBody->CheckSameAsPrevious = true;
	SkipBody->New = createTagFromNewDecl();
	SkipBody->Previous = Def;
	return Def;
	} else {
	SkipBody->ShouldSkip = true;
	SkipBody->Previous = Def;
	makeMergedDefinitionVisible(Hidden);
	// Carry on and handle it like a normal definition. We'll
	// skip starting the definitiion later.
	}
	} else if (!IsExplicitSpecializationAfterInstantiation) {
	// A redeclaration in function prototype scope in C isn't
	// visible elsewhere, so merely issue a warning.
	if (!getLangOpts().CPlusPlus && S->containedInPrototypeScope())
	Diag(NameLoc, diag::warn_redefinition_in_param_list) << Name;
	else
	Diag(NameLoc, diag::err_redefinition) << Name;
	notePreviousDefinition(Def,
	NameLoc.isValid() ? NameLoc : KWLoc);
	// If this is a redefinition, recover by making this
	// struct be anonymous, which will make any later
	// references get the previous definition.
	Name = nullptr;
	Previous.clear();
	Invalid = true;
	}
	} else {
	// If the type is currently being defined, complain
	// about a nested redefinition.
	auto *TD = Context.getTagDeclType(PrevTagDecl)->getAsTagDecl();
	if (TD->isBeingDefined()) {
	Diag(NameLoc, diag::err_nested_redefinition) << Name;
	Diag(PrevTagDecl->getLocation(),
	diag::note_previous_definition);
	Name = nullptr;
	Previous.clear();
	Invalid = true;
	}
	}

	// Okay, this is definition of a previously declared or referenced
	// tag. We're going to create a new Decl for it.
	}

	// Okay, we're going to make a redeclaration. If this is some kind
	// of reference, make sure we build the redeclaration in the same DC
	// as the original, and ignore the current access specifier.
	if (TUK == TUK_Friend \|\| TUK == TUK_Reference) {
	SearchDC = PrevTagDecl->getDeclContext();
	AS = AS_none;
	}
	}
	// If we get here we have (another) forward declaration or we
	// have a definition. Just create a new decl.

	} else {
	// If we get here, this is a definition of a new tag type in a nested
	// scope, e.g. "struct foo; void bar() { struct foo; }", just create a
	// new decl/type. We set PrevDecl to NULL so that the entities
	// have distinct types.
	Previous.clear();
	}
	// If we get here, we're going to create a new Decl. If PrevDecl
	// is non-NULL, it's a definition of the tag declared by
	// PrevDecl. If it's NULL, we have a new definition.

	// Otherwise, PrevDecl is not a tag, but was found with tag
	// lookup. This is only actually possible in C++, where a few
	// things like templates still live in the tag namespace.
	} else {
	// Use a better diagnostic if an elaborated-type-specifier
	// found the wrong kind of type on the first
	// (non-redeclaration) lookup.
	if ((TUK == TUK_Reference \|\| TUK == TUK_Friend) &&
	!Previous.isForRedeclaration()) {
	NonTagKind NTK = getNonTagTypeDeclKind(PrevDecl, Kind);
	Diag(NameLoc, diag::err_tag_reference_non_tag) << PrevDecl << NTK
	<< Kind;
	Diag(PrevDecl->getLocation(), diag::note_declared_at);
	Invalid = true;

	// Otherwise, only diagnose if the declaration is in scope.
	} else if (!isDeclInScope(DirectPrevDecl, SearchDC, S,
	SS.isNotEmpty() \|\| isMemberSpecialization)) {
	// do nothing

	// Diagnose implicit declarations introduced by elaborated types.
	} else if (TUK == TUK_Reference \|\| TUK == TUK_Friend) {
	NonTagKind NTK = getNonTagTypeDeclKind(PrevDecl, Kind);
	Diag(NameLoc, diag::err_tag_reference_conflict) << NTK;
	Diag(PrevDecl->getLocation(), diag::note_previous_decl) << PrevDecl;
	Invalid = true;

	// Otherwise it's a declaration. Call out a particularly common
	// case here.
	} else if (TypedefNameDecl *TND = dyn_cast<TypedefNameDecl>(PrevDecl)) {
	unsigned Kind = 0;
	if (isa<TypeAliasDecl>(PrevDecl)) Kind = 1;
	Diag(NameLoc, diag::err_tag_definition_of_typedef)
	<< Name << Kind << TND->getUnderlyingType();
	Diag(PrevDecl->getLocation(), diag::note_previous_decl) << PrevDecl;
	Invalid = true;

	// Otherwise, diagnose.
	} else {
	// The tag name clashes with something else in the target scope,
	// issue an error and recover by making this tag be anonymous.
	Diag(NameLoc, diag::err_redefinition_different_kind) << Name;
	notePreviousDefinition(PrevDecl, NameLoc);
	Name = nullptr;
	Invalid = true;
	}

	// The existing declaration isn't relevant to us; we're in a
	// new scope, so clear out the previous declaration.
	Previous.clear();
	}
	}

	CreateNewDecl:

	TagDecl *PrevDecl = nullptr;
	if (Previous.isSingleResult())
	PrevDecl = cast<TagDecl>(Previous.getFoundDecl());

	// If there is an identifier, use the location of the identifier as the
	// location of the decl, otherwise use the location of the struct/union
	// keyword.
	SourceLocation Loc = NameLoc.isValid() ? NameLoc : KWLoc;

	// Otherwise, create a new declaration. If there is a previous
	// declaration of the same entity, the two will be linked via
	// PrevDecl.
	TagDecl *New;

	if (Kind == TTK_Enum) {
	// FIXME: Tag decls should be chained to any simultaneous vardecls, e.g.:
	// enum X { A, B, C } D; D should chain to X.
	New = EnumDecl::Create(Context, SearchDC, KWLoc, Loc, Name,
	cast_or_null<EnumDecl>(PrevDecl), ScopedEnum,
	ScopedEnumUsesClassTag, IsFixed);

	if (isStdAlignValT && (!StdAlignValT \|\| getStdAlignValT()->isImplicit()))
	StdAlignValT = cast<EnumDecl>(New);

	// If this is an undefined enum, warn.
	if (TUK != TUK_Definition && !Invalid) {
	TagDecl *Def;
	if (IsFixed && cast<EnumDecl>(New)->isFixed()) {
	// C++0x: 7.2p2: opaque-enum-declaration.
	// Conflicts are diagnosed above. Do nothing.
	}
	else if (PrevDecl && (Def = cast<EnumDecl>(PrevDecl)->getDefinition())) {
	Diag(Loc, diag::ext_forward_ref_enum_def)
	<< New;
	Diag(Def->getLocation(), diag::note_previous_definition);
	} else {
	unsigned DiagID = diag::ext_forward_ref_enum;
	if (getLangOpts().MSVCCompat)
	DiagID = diag::ext_ms_forward_ref_enum;
	else if (getLangOpts().CPlusPlus)
	DiagID = diag::err_forward_ref_enum;
	Diag(Loc, DiagID);
	}
	}

	if (EnumUnderlying) {
	EnumDecl *ED = cast<EnumDecl>(New);
	if (TypeSourceInfo TI = EnumUnderlying.dyn_cast<TypeSourceInfo>())
	ED->setIntegerTypeSourceInfo(TI);
	else
	ED->setIntegerType(QualType(EnumUnderlying.get<const Type *>(), 0));
	QualType EnumTy = ED->getIntegerType();
	ED->setPromotionType(Context.isPromotableIntegerType(EnumTy)
	? Context.getPromotedIntegerType(EnumTy)
	: EnumTy);
	assert(ED->isComplete() && "enum with type should be complete");
	}
	} else {
	// struct/union/class

	// FIXME: Tag decls should be chained to any simultaneous vardecls, e.g.:
	// struct X { int A; } D; D should chain to X.
	if (getLangOpts().CPlusPlus) {
	// FIXME: Look for a way to use RecordDecl for simple structs.
	New = CXXRecordDecl::Create(Context, Kind, SearchDC, KWLoc, Loc, Name,
	cast_or_null<CXXRecordDecl>(PrevDecl));

	if (isStdBadAlloc && (!StdBadAlloc \|\| getStdBadAlloc()->isImplicit()))
	StdBadAlloc = cast<CXXRecordDecl>(New);
	} else
	New = RecordDecl::Create(Context, Kind, SearchDC, KWLoc, Loc, Name,
	cast_or_null<RecordDecl>(PrevDecl));
	}

	if (OOK != OOK_Outside && TUK == TUK_Definition && !getLangOpts().CPlusPlus)
	Diag(New->getLocation(), diag::ext_type_defined_in_offsetof)
	<< (OOK == OOK_Macro) << New->getSourceRange();

	// C++11 [dcl.type]p3:
	// A type-specifier-seq shall not define a class or enumeration [...].
	if (!Invalid && getLangOpts().CPlusPlus &&
	(IsTypeSpecifier \|\| IsTemplateParamOrArg) && TUK == TUK_Definition) {
	Diag(New->getLocation(), diag::err_type_defined_in_type_specifier)
	<< Context.getTagDeclType(New);
	Invalid = true;
	}

	if (!Invalid && getLangOpts().CPlusPlus && TUK == TUK_Definition &&
	DC->getDeclKind() == Decl::Enum) {
	Diag(New->getLocation(), diag::err_type_defined_in_enum)
	<< Context.getTagDeclType(New);
	Invalid = true;
	}

	// Maybe add qualifier info.
	if (SS.isNotEmpty()) {
	if (SS.isSet()) {
	// If this is either a declaration or a definition, check the
	// nested-name-specifier against the current context.
	if ((TUK == TUK_Definition \|\| TUK == TUK_Declaration) &&
	diagnoseQualifiedDeclaration(SS, DC, OrigName, Loc,
	isMemberSpecialization))
	Invalid = true;

	New->setQualifierInfo(SS.getWithLocInContext(Context));
	if (TemplateParameterLists.size() > 0) {
	New->setTemplateParameterListsInfo(Context, TemplateParameterLists);
	}
	}
	else
	Invalid = true;
	}

	if (RecordDecl *RD = dyn_cast<RecordDecl>(New)) {
	// Add alignment attributes if necessary; these attributes are checked when
	// the ASTContext lays out the structure.
	//
	// It is important for implementing the correct semantics that this
	// happen here (in ActOnTag). The #pragma pack stack is
	// maintained as a result of parser callbacks which can occur at
	// many points during the parsing of a struct declaration (because
	// the #pragma tokens are effectively skipped over during the
	// parsing of the struct).
	if (TUK == TUK_Definition && (!SkipBody \|\| !SkipBody->ShouldSkip)) {
	AddAlignmentAttributesForRecord(RD);
	AddMsStructLayoutForRecord(RD);
	}
	}

	if (ModulePrivateLoc.isValid()) {
	if (isMemberSpecialization)
	Diag(New->getLocation(), diag::err_module_private_specialization)
	<< 2
	<< FixItHint::CreateRemoval(ModulePrivateLoc);
	// __module_private__ does not apply to local classes. However, we only
	// diagnose this as an error when the declaration specifiers are
	// freestanding. Here, we just ignore the __module_private__.
	else if (!SearchDC->isFunctionOrMethod())
	New->setModulePrivate();
	}

	// If this is a specialization of a member class (of a class template),
	// check the specialization.
	if (isMemberSpecialization && CheckMemberSpecialization(New, Previous))
	Invalid = true;

	// If we're declaring or defining a tag in function prototype scope in C,
	// note that this type can only be used within the function and add it to
	// the list of decls to inject into the function definition scope.
	if ((Name \|\| Kind == TTK_Enum) &&
	getNonFieldDeclScope(S)->isFunctionPrototypeScope()) {
	if (getLangOpts().CPlusPlus) {
	// C++ [dcl.fct]p6:
	// Types shall not be defined in return or parameter types.
	if (TUK == TUK_Definition && !IsTypeSpecifier) {
	Diag(Loc, diag::err_type_defined_in_param_type)
	<< Name;
	Invalid = true;
	}
	} else if (!PrevDecl) {
	Diag(Loc, diag::warn_decl_in_param_list) << Context.getTagDeclType(New);
	}
	}

	if (Invalid)
	New->setInvalidDecl();

	// Set the lexical context. If the tag has a C++ scope specifier, the
	// lexical context will be different from the semantic context.
	New->setLexicalDeclContext(CurContext);

	// Mark this as a friend decl if applicable.
	// In Microsoft mode, a friend declaration also acts as a forward
	// declaration so we always pass true to setObjectOfFriendDecl to make
	// the tag name visible.
	if (TUK == TUK_Friend)
	New->setObjectOfFriendDecl(getLangOpts().MSVCCompat);

	// Set the access specifier.
	if (!Invalid && SearchDC->isRecord())
	SetMemberAccessSpecifier(New, PrevDecl, AS);

	if (PrevDecl)
	CheckRedeclarationInModule(New, PrevDecl);

	if (TUK == TUK_Definition && (!SkipBody \|\| !SkipBody->ShouldSkip))
	New->startDefinition();

	ProcessDeclAttributeList(S, New, Attrs);
	AddPragmaAttributes(S, New);

	// If this has an identifier, add it to the scope stack.
	if (TUK == TUK_Friend) {
	// We might be replacing an existing declaration in the lookup tables;
	// if so, borrow its access specifier.
	if (PrevDecl)
	New->setAccess(PrevDecl->getAccess());

	DeclContext *DC = New->getDeclContext()->getRedeclContext();
	DC->makeDeclVisibleInContext(New);
	if (Name) // can be null along some error paths
	if (Scope *EnclosingScope = getScopeForDeclContext(S, DC))
	PushOnScopeChains(New, EnclosingScope, /* AddToContext = */ false);
	} else if (Name) {
	S = getNonFieldDeclScope(S);
	PushOnScopeChains(New, S, true);
	} else {
	CurContext->addDecl(New);
	}

	// If this is the C FILE type, notify the AST context.
	if (IdentifierInfo *II = New->getIdentifier())
	if (!New->isInvalidDecl() &&
	New->getDeclContext()->getRedeclContext()->isTranslationUnit() &&
	II->isStr("FILE"))
	Context.setFILEDecl(New);

	if (PrevDecl)
	mergeDeclAttributes(New, PrevDecl);

	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(New))
	inferGslOwnerPointerAttribute(CXXRD);

	// If there's a #pragma GCC visibility in scope, set the visibility of this
	// record.
	AddPushedVisibilityAttribute(New);

	if (isMemberSpecialization && !New->isInvalidDecl())
	CompleteMemberSpecialization(New, Previous);

	OwnedDecl = true;
	// In C++, don't return an invalid declaration. We can't recover well from
	// the cases where we make the type anonymous.
	if (Invalid && getLangOpts().CPlusPlus) {
	if (New->isBeingDefined())
	if (auto RD = dyn_cast<RecordDecl>(New))
	RD->completeDefinition();
	return true;
	} else if (SkipBody && SkipBody->ShouldSkip) {
	return SkipBody->Previous;
	} else {
	return New;
	}
	}

	void Sema::ActOnTagStartDefinition(Scope S, Decl TagD) {
	AdjustDeclIfTemplate(TagD);
	TagDecl *Tag = cast<TagDecl>(TagD);

	// Enter the tag context.
	PushDeclContext(S, Tag);

	ActOnDocumentableDecl(TagD);

	// If there's a #pragma GCC visibility in scope, set the visibility of this
	// record.
	AddPushedVisibilityAttribute(Tag);
	}

	bool Sema::ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody) {
	if (!hasStructuralCompatLayout(Prev, SkipBody.New))
	return false;

	// Make the previous decl visible.
	makeMergedDefinitionVisible(SkipBody.Previous);
	return true;
	}

	void Sema::ActOnObjCContainerStartDefinition(ObjCContainerDecl *IDecl) {
	assert(IDecl->getLexicalParent() == CurContext &&
	"The next DeclContext should be lexically contained in the current one.");
	CurContext = IDecl;
	}

	void Sema::ActOnStartCXXMemberDeclarations(Scope S, Decl TagD,
	SourceLocation FinalLoc,
	bool IsFinalSpelledSealed,
	bool IsAbstract,
	SourceLocation LBraceLoc) {
	AdjustDeclIfTemplate(TagD);
	CXXRecordDecl *Record = cast<CXXRecordDecl>(TagD);

	FieldCollector->StartClass();

	if (!Record->getIdentifier())
	return;

	if (IsAbstract)
	Record->markAbstract();

	if (FinalLoc.isValid()) {
	Record->addAttr(FinalAttr::Create(
	Context, FinalLoc, AttributeCommonInfo::AS_Keyword,
	static_cast<FinalAttr::Spelling>(IsFinalSpelledSealed)));
	}
	// C++ [class]p2:
	// [...] The class-name is also inserted into the scope of the
	// class itself; this is known as the injected-class-name. For
	// purposes of access checking, the injected-class-name is treated
	// as if it were a public member name.
	CXXRecordDecl *InjectedClassName = CXXRecordDecl::Create(
	Context, Record->getTagKind(), CurContext, Record->getBeginLoc(),
	Record->getLocation(), Record->getIdentifier(),
	/PrevDecl=/nullptr,
	/DelayTypeCreation=/true);
	Context.getTypeDeclType(InjectedClassName, Record);
	InjectedClassName->setImplicit();
	InjectedClassName->setAccess(AS_public);
	if (ClassTemplateDecl *Template = Record->getDescribedClassTemplate())
	InjectedClassName->setDescribedClassTemplate(Template);
	PushOnScopeChains(InjectedClassName, S);
	assert(InjectedClassName->isInjectedClassName() &&
	"Broken injected-class-name");
	}

	void Sema::ActOnTagFinishDefinition(Scope S, Decl TagD,
	SourceRange BraceRange) {
	AdjustDeclIfTemplate(TagD);
	TagDecl *Tag = cast<TagDecl>(TagD);
	Tag->setBraceRange(BraceRange);

	// Make sure we "complete" the definition even it is invalid.
	if (Tag->isBeingDefined()) {
	assert(Tag->isInvalidDecl() && "We should already have completed it");
	if (RecordDecl *RD = dyn_cast<RecordDecl>(Tag))
	RD->completeDefinition();
	}

	if (auto *RD = dyn_cast<CXXRecordDecl>(Tag)) {
	FieldCollector->FinishClass();
	if (RD->hasAttr<SYCLSpecialClassAttr>()) {
	auto *Def = RD->getDefinition();
	assert(Def && "The record is expected to have a completed definition");
	unsigned NumInitMethods = 0;
	for (auto *Method : Def->methods()) {
	if (!Method->getIdentifier())
	continue;
	if (Method->getName() == "__init")
	NumInitMethods++;
	}
	if (NumInitMethods > 1 \|\| !Def->hasInitMethod())
	Diag(RD->getLocation(), diag::err_sycl_special_type_num_init_method);
	}
	}

	// Exit this scope of this tag's definition.
	PopDeclContext();

	if (getCurLexicalContext()->isObjCContainer() &&
	Tag->getDeclContext()->isFileContext())
	Tag->setTopLevelDeclInObjCContainer();

	// Notify the consumer that we've defined a tag.
	if (!Tag->isInvalidDecl())
	Consumer.HandleTagDeclDefinition(Tag);

	// Clangs implementation of #pragma align(packed) differs in bitfield layout
	// from XLs and instead matches the XL #pragma pack(1) behavior.
	if (Context.getTargetInfo().getTriple().isOSAIX() &&
	AlignPackStack.hasValue()) {
	AlignPackInfo APInfo = AlignPackStack.CurrentValue;
	// Only diagnose #pragma align(packed).
	if (!APInfo.IsAlignAttr() \|\| APInfo.getAlignMode() != AlignPackInfo::Packed)
	return;
	const RecordDecl *RD = dyn_cast<RecordDecl>(Tag);
	if (!RD)
	return;
	// Only warn if there is at least 1 bitfield member.
	if (llvm::any_of(RD->fields(),
	[](const FieldDecl *FD) { return FD->isBitField(); }))
	Diag(BraceRange.getBegin(), diag::warn_pragma_align_not_xl_compatible);
	}
	}

	void Sema::ActOnObjCContainerFinishDefinition() {
	// Exit this scope of this interface definition.
	PopDeclContext();
	}

	void Sema::ActOnObjCTemporaryExitContainerContext(ObjCContainerDecl *ObjCCtx) {
	assert(ObjCCtx == CurContext && "Mismatch of container contexts");
	OriginalLexicalContext = ObjCCtx;
	ActOnObjCContainerFinishDefinition();
	}

	void Sema::ActOnObjCReenterContainerContext(ObjCContainerDecl *ObjCCtx) {
	ActOnObjCContainerStartDefinition(ObjCCtx);
	OriginalLexicalContext = nullptr;
	}

	void Sema::ActOnTagDefinitionError(Scope S, Decl TagD) {
	AdjustDeclIfTemplate(TagD);
	TagDecl *Tag = cast<TagDecl>(TagD);
	Tag->setInvalidDecl();

	// Make sure we "complete" the definition even it is invalid.
	if (Tag->isBeingDefined()) {
	if (RecordDecl *RD = dyn_cast<RecordDecl>(Tag))
	RD->completeDefinition();
	}

	// We're undoing ActOnTagStartDefinition here, not
	// ActOnStartCXXMemberDeclarations, so we don't have to mess with
	// the FieldCollector.

	PopDeclContext();
	}

	// Note that FieldName may be null for anonymous bitfields.
	ExprResult Sema::VerifyBitField(SourceLocation FieldLoc,
	IdentifierInfo *FieldName, QualType FieldTy,
	bool IsMsStruct, Expr *BitWidth) {
	assert(BitWidth);
	if (BitWidth->containsErrors())
	return ExprError();

	// C99 6.7.2.1p4 - verify the field type.
	// C++ 9.6p3: A bit-field shall have integral or enumeration type.
	if (!FieldTy->isDependentType() && !FieldTy->isIntegralOrEnumerationType()) {
	// Handle incomplete and sizeless types with a specific error.
	if (RequireCompleteSizedType(FieldLoc, FieldTy,
	diag::err_field_incomplete_or_sizeless))
	return ExprError();
	if (FieldName)
	return Diag(FieldLoc, diag::err_not_integral_type_bitfield)
	<< FieldName << FieldTy << BitWidth->getSourceRange();
	return Diag(FieldLoc, diag::err_not_integral_type_anon_bitfield)
	<< FieldTy << BitWidth->getSourceRange();
	} else if (DiagnoseUnexpandedParameterPack(const_cast<Expr *>(BitWidth),
	UPPC_BitFieldWidth))
	return ExprError();

	// If the bit-width is type- or value-dependent, don't try to check
	// it now.
	if (BitWidth->isValueDependent() \|\| BitWidth->isTypeDependent())
	return BitWidth;

	llvm::APSInt Value;
	ExprResult ICE = VerifyIntegerConstantExpression(BitWidth, &Value, AllowFold);
	if (ICE.isInvalid())
	return ICE;
	BitWidth = ICE.get();

	// Zero-width bitfield is ok for anonymous field.
	if (Value == 0 && FieldName)
	return Diag(FieldLoc, diag::err_bitfield_has_zero_width) << FieldName;

	if (Value.isSigned() && Value.isNegative()) {
	if (FieldName)
	return Diag(FieldLoc, diag::err_bitfield_has_negative_width)
	<< FieldName << toString(Value, 10);
	return Diag(FieldLoc, diag::err_anon_bitfield_has_negative_width)
	<< toString(Value, 10);
	}

	// The size of the bit-field must not exceed our maximum permitted object
	// size.
	if (Value.getActiveBits() > ConstantArrayType::getMaxSizeBits(Context)) {
	return Diag(FieldLoc, diag::err_bitfield_too_wide)
	<< !FieldName << FieldName << toString(Value, 10);
	}

	if (!FieldTy->isDependentType()) {
	uint64_t TypeStorageSize = Context.getTypeSize(FieldTy);
	uint64_t TypeWidth = Context.getIntWidth(FieldTy);
	bool BitfieldIsOverwide = Value.ugt(TypeWidth);

	// Over-wide bitfields are an error in C or when using the MSVC bitfield
	// ABI.
	bool CStdConstraintViolation =
	BitfieldIsOverwide && !getLangOpts().CPlusPlus;
	bool MSBitfieldViolation =
	Value.ugt(TypeStorageSize) &&
	(IsMsStruct \|\| Context.getTargetInfo().getCXXABI().isMicrosoft());
	if (CStdConstraintViolation \|\| MSBitfieldViolation) {
	unsigned DiagWidth =
	CStdConstraintViolation ? TypeWidth : TypeStorageSize;
	return Diag(FieldLoc, diag::err_bitfield_width_exceeds_type_width)
	<< (bool)FieldName << FieldName << toString(Value, 10)
	<< !CStdConstraintViolation << DiagWidth;
	}

	// Warn on types where the user might conceivably expect to get all
	// specified bits as value bits: that's all integral types other than
	// 'bool'.
	if (BitfieldIsOverwide && !FieldTy->isBooleanType() && FieldName) {
	Diag(FieldLoc, diag::warn_bitfield_width_exceeds_type_width)
	<< FieldName << toString(Value, 10)
	<< (unsigned)TypeWidth;
	}
	}

	return BitWidth;
	}

	/// ActOnField - Each field of a C struct/union is passed into this in order
	/// to create a FieldDecl object for it.
	Decl Sema::ActOnField(Scope S, Decl *TagD, SourceLocation DeclStart,
	Declarator &D, Expr *BitfieldWidth) {
	FieldDecl *Res = HandleField(S, cast_or_null<RecordDecl>(TagD),
	DeclStart, D, static_cast<Expr*>(BitfieldWidth),
	/InitStyle=/ICIS_NoInit, AS_public);
	return Res;
	}

	/// HandleField - Analyze a field of a C struct or a C++ data member.
	///
	FieldDecl Sema::HandleField(Scope S, RecordDecl *Record,
	SourceLocation DeclStart,
	Declarator &D, Expr *BitWidth,
	InClassInitStyle InitStyle,
	AccessSpecifier AS) {
	if (D.isDecompositionDeclarator()) {
	const DecompositionDeclarator &Decomp = D.getDecompositionDeclarator();
	Diag(Decomp.getLSquareLoc(), diag::err_decomp_decl_context)
	<< Decomp.getSourceRange();
	return nullptr;
	}

	IdentifierInfo *II = D.getIdentifier();
	SourceLocation Loc = DeclStart;
	if (II) Loc = D.getIdentifierLoc();

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType T = TInfo->getType();
	if (getLangOpts().CPlusPlus) {
	CheckExtraCXXDefaultArguments(D);

	if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
	UPPC_DataMemberType)) {
	D.setInvalidType();
	T = Context.IntTy;
	TInfo = Context.getTrivialTypeSourceInfo(T, Loc);
	}
	}

	DiagnoseFunctionSpecifiers(D.getDeclSpec());

	if (D.getDeclSpec().isInlineSpecified())
	Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
	<< getLangOpts().CPlusPlus17;
	if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_invalid_thread)
	<< DeclSpec::getSpecifierName(TSCS);

	// Check to see if this name was declared as a member previously
	NamedDecl *PrevDecl = nullptr;
	LookupResult Previous(*this, II, Loc, LookupMemberName,
	ForVisibleRedeclaration);
	LookupName(Previous, S);
	switch (Previous.getResultKind()) {
	case LookupResult::Found:
	case LookupResult::FoundUnresolvedValue:
	PrevDecl = Previous.getAsSingle<NamedDecl>();
	break;

	case LookupResult::FoundOverloaded:
	PrevDecl = Previous.getRepresentativeDecl();
	break;

	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	case LookupResult::Ambiguous:
	break;
	}
	Previous.suppressDiagnostics();

	if (PrevDecl && PrevDecl->isTemplateParameter()) {
	// Maybe we will complain about the shadowed template parameter.
	DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), PrevDecl);
	// Just pretend that we didn't see the previous declaration.
	PrevDecl = nullptr;
	}

	if (PrevDecl && !isDeclInScope(PrevDecl, Record, S))
	PrevDecl = nullptr;

	bool Mutable
	= (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_mutable);
	SourceLocation TSSL = D.getBeginLoc();
	FieldDecl *NewFD
	= CheckFieldDecl(II, T, TInfo, Record, Loc, Mutable, BitWidth, InitStyle,
	TSSL, AS, PrevDecl, &D);

	if (NewFD->isInvalidDecl())
	Record->setInvalidDecl();

	if (D.getDeclSpec().isModulePrivateSpecified())
	NewFD->setModulePrivate();

	if (NewFD->isInvalidDecl() && PrevDecl) {
	// Don't introduce NewFD into scope; there's already something
	// with the same name in the same scope.
	} else if (II) {
	PushOnScopeChains(NewFD, S);
	} else
	Record->addDecl(NewFD);

	return NewFD;
	}

	/// Build a new FieldDecl and check its well-formedness.
	///
	/// This routine builds a new FieldDecl given the fields name, type,
	/// record, etc. \p PrevDecl should refer to any previous declaration
	/// with the same name and in the same scope as the field to be
	/// created.
	///
	/// \returns a new FieldDecl.
	///
	/// \todo The Declarator argument is a hack. It will be removed once
	FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T,
	TypeSourceInfo *TInfo,
	RecordDecl *Record, SourceLocation Loc,
	bool Mutable, Expr *BitWidth,
	InClassInitStyle InitStyle,
	SourceLocation TSSL,
	AccessSpecifier AS, NamedDecl *PrevDecl,
	Declarator *D) {
	IdentifierInfo *II = Name.getAsIdentifierInfo();
	bool InvalidDecl = false;
	if (D) InvalidDecl = D->isInvalidType();

	// If we receive a broken type, recover by assuming 'int' and
	// marking this declaration as invalid.
	if (T.isNull() \|\| T->containsErrors()) {
	InvalidDecl = true;
	T = Context.IntTy;
	}

	QualType EltTy = Context.getBaseElementType(T);
	if (!EltTy->isDependentType() && !EltTy->containsErrors()) {
	if (RequireCompleteSizedType(Loc, EltTy,
	diag::err_field_incomplete_or_sizeless)) {
	// Fields of incomplete type force their record to be invalid.
	Record->setInvalidDecl();
	InvalidDecl = true;
	} else {
	NamedDecl *Def;
	EltTy->isIncompleteType(&Def);
	if (Def && Def->isInvalidDecl()) {
	Record->setInvalidDecl();
	InvalidDecl = true;
	}
	}
	}

	// TR 18037 does not allow fields to be declared with address space
	if (T.hasAddressSpace() \|\| T->isDependentAddressSpaceType() \|\|
	T->getBaseElementTypeUnsafe()->isDependentAddressSpaceType()) {
	Diag(Loc, diag::err_field_with_address_space);
	Record->setInvalidDecl();
	InvalidDecl = true;
	}

	if (LangOpts.OpenCL) {
	// OpenCL v1.2 s6.9b,r & OpenCL v2.0 s6.12.5 - The following types cannot be
	// used as structure or union field: image, sampler, event or block types.
	if (T->isEventT() \|\| T->isImageType() \|\| T->isSamplerT() \|\|
	T->isBlockPointerType()) {
	Diag(Loc, diag::err_opencl_type_struct_or_union_field) << T;
	Record->setInvalidDecl();
	InvalidDecl = true;
	}
	// OpenCL v1.2 s6.9.c: bitfields are not supported, unless Clang extension
	// is enabled.
	if (BitWidth && !getOpenCLOptions().isAvailableOption(
	"__cl_clang_bitfields", LangOpts)) {
	Diag(Loc, diag::err_opencl_bitfields);
	InvalidDecl = true;
	}
	}

	// Anonymous bit-fields cannot be cv-qualified (CWG 2229).
	if (!InvalidDecl && getLangOpts().CPlusPlus && !II && BitWidth &&
	T.hasQualifiers()) {
	InvalidDecl = true;
	Diag(Loc, diag::err_anon_bitfield_qualifiers);
	}

	// C99 6.7.2.1p8: A member of a structure or union may have any type other
	// than a variably modified type.
	if (!InvalidDecl && T->isVariablyModifiedType()) {
	if (!tryToFixVariablyModifiedVarType(
	TInfo, T, Loc, diag::err_typecheck_field_variable_size))
	InvalidDecl = true;
	}

	// Fields can not have abstract class types
	if (!InvalidDecl && RequireNonAbstractType(Loc, T,
	diag::err_abstract_type_in_decl,
	AbstractFieldType))
	InvalidDecl = true;

	if (InvalidDecl)
	BitWidth = nullptr;
	// If this is declared as a bit-field, check the bit-field.
	if (BitWidth) {
	BitWidth =
	VerifyBitField(Loc, II, T, Record->isMsStruct(Context), BitWidth).get();
	if (!BitWidth) {
	InvalidDecl = true;
	BitWidth = nullptr;
	}
	}

	// Check that 'mutable' is consistent with the type of the declaration.
	if (!InvalidDecl && Mutable) {
	unsigned DiagID = 0;
	if (T->isReferenceType())
	DiagID = getLangOpts().MSVCCompat ? diag::ext_mutable_reference
	: diag::err_mutable_reference;
	else if (T.isConstQualified())
	DiagID = diag::err_mutable_const;

	if (DiagID) {
	SourceLocation ErrLoc = Loc;
	if (D && D->getDeclSpec().getStorageClassSpecLoc().isValid())
	ErrLoc = D->getDeclSpec().getStorageClassSpecLoc();
	Diag(ErrLoc, DiagID);
	if (DiagID != diag::ext_mutable_reference) {
	Mutable = false;
	InvalidDecl = true;
	}
	}
	}

	// C++11 [class.union]p8 (DR1460):
	// At most one variant member of a union may have a
	// brace-or-equal-initializer.
	if (InitStyle != ICIS_NoInit)
	checkDuplicateDefaultInit(*this, cast<CXXRecordDecl>(Record), Loc);

	FieldDecl *NewFD = FieldDecl::Create(Context, Record, TSSL, Loc, II, T, TInfo,
	BitWidth, Mutable, InitStyle);
	if (InvalidDecl)
	NewFD->setInvalidDecl();

	if (PrevDecl && !isa<TagDecl>(PrevDecl)) {
	Diag(Loc, diag::err_duplicate_member) << II;
	Diag(PrevDecl->getLocation(), diag::note_previous_declaration);
	NewFD->setInvalidDecl();
	}

	if (!InvalidDecl && getLangOpts().CPlusPlus) {
	if (Record->isUnion()) {
	if (const RecordType *RT = EltTy->getAs<RecordType>()) {
	CXXRecordDecl* RDecl = cast<CXXRecordDecl>(RT->getDecl());
	if (RDecl->getDefinition()) {
	// C++ [class.union]p1: An object of a class with a non-trivial
	// constructor, a non-trivial copy constructor, a non-trivial
	// destructor, or a non-trivial copy assignment operator
	// cannot be a member of a union, nor can an array of such
	// objects.
	if (CheckNontrivialField(NewFD))
	NewFD->setInvalidDecl();
	}
	}

	// C++ [class.union]p1: If a union contains a member of reference type,
	// the program is ill-formed, except when compiling with MSVC extensions
	// enabled.
	if (EltTy->isReferenceType()) {
	Diag(NewFD->getLocation(), getLangOpts().MicrosoftExt ?
	diag::ext_union_member_of_reference_type :
	diag::err_union_member_of_reference_type)
	<< NewFD->getDeclName() << EltTy;
	if (!getLangOpts().MicrosoftExt)
	NewFD->setInvalidDecl();
	}
	}
	}

	// FIXME: We need to pass in the attributes given an AST
	// representation, not a parser representation.
	if (D) {
	// FIXME: The current scope is almost... but not entirely... correct here.
	ProcessDeclAttributes(getCurScope(), NewFD, *D);

	if (NewFD->hasAttrs())
	CheckAlignasUnderalignment(NewFD);
	}

	// In auto-retain/release, infer strong retension for fields of
	// retainable type.
	if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(NewFD))
	NewFD->setInvalidDecl();

	if (T.isObjCGCWeak())
	Diag(Loc, diag::warn_attribute_weak_on_field);

	// PPC MMA non-pointer types are not allowed as field types.
	if (Context.getTargetInfo().getTriple().isPPC64() &&
	CheckPPCMMAType(T, NewFD->getLocation()))
	NewFD->setInvalidDecl();

	NewFD->setAccess(AS);
	return NewFD;
	}

	bool Sema::CheckNontrivialField(FieldDecl *FD) {
	assert(FD);
	assert(getLangOpts().CPlusPlus && "valid check only for C++");

	if (FD->isInvalidDecl() \|\| FD->getType()->isDependentType())
	return false;

	QualType EltTy = Context.getBaseElementType(FD->getType());
	if (const RecordType *RT = EltTy->getAs<RecordType>()) {
	CXXRecordDecl *RDecl = cast<CXXRecordDecl>(RT->getDecl());
	if (RDecl->getDefinition()) {
	// We check for copy constructors before constructors
	// because otherwise we'll never get complaints about
	// copy constructors.

	CXXSpecialMember member = CXXInvalid;
	// We're required to check for any non-trivial constructors. Since the
	// implicit default constructor is suppressed if there are any
	// user-declared constructors, we just need to check that there is a
	// trivial default constructor and a trivial copy constructor. (We don't
	// worry about move constructors here, since this is a C++98 check.)
	if (RDecl->hasNonTrivialCopyConstructor())
	member = CXXCopyConstructor;
	else if (!RDecl->hasTrivialDefaultConstructor())
	member = CXXDefaultConstructor;
	else if (RDecl->hasNonTrivialCopyAssignment())
	member = CXXCopyAssignment;
	else if (RDecl->hasNonTrivialDestructor())
	member = CXXDestructor;

	if (member != CXXInvalid) {
	if (!getLangOpts().CPlusPlus11 &&
	getLangOpts().ObjCAutoRefCount && RDecl->hasObjectMember()) {
	// Objective-C++ ARC: it is an error to have a non-trivial field of
	// a union. However, system headers in Objective-C programs
	// occasionally have Objective-C lifetime objects within unions,
	// and rather than cause the program to fail, we make those
	// members unavailable.
	SourceLocation Loc = FD->getLocation();
	if (getSourceManager().isInSystemHeader(Loc)) {
	if (!FD->hasAttr<UnavailableAttr>())
	FD->addAttr(UnavailableAttr::CreateImplicit(Context, "",
	UnavailableAttr::IR_ARCFieldWithOwnership, Loc));
	return false;
	}
	}

	Diag(FD->getLocation(), getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_nontrivial_union_or_anon_struct_member :
	diag::err_illegal_union_or_anon_struct_member)
	<< FD->getParent()->isUnion() << FD->getDeclName() << member;
	DiagnoseNontrivial(RDecl, member);
	return !getLangOpts().CPlusPlus11;
	}
	}
	}

	return false;
	}

	/// TranslateIvarVisibility - Translate visibility from a token ID to an
	/// AST enum value.
	static ObjCIvarDecl::AccessControl
	TranslateIvarVisibility(tok::ObjCKeywordKind ivarVisibility) {
	switch (ivarVisibility) {
	default: llvm_unreachable("Unknown visitibility kind");
	case tok::objc_private: return ObjCIvarDecl::Private;
	case tok::objc_public: return ObjCIvarDecl::Public;
	case tok::objc_protected: return ObjCIvarDecl::Protected;
	case tok::objc_package: return ObjCIvarDecl::Package;
	}
	}

	/// ActOnIvar - Each ivar field of an objective-c class is passed into this
	/// in order to create an IvarDecl object for it.
	Decl Sema::ActOnIvar(Scope S,
	SourceLocation DeclStart,
	Declarator &D, Expr *BitfieldWidth,
	tok::ObjCKeywordKind Visibility) {

	IdentifierInfo *II = D.getIdentifier();
	Expr BitWidth = (Expr)BitfieldWidth;
	SourceLocation Loc = DeclStart;
	if (II) Loc = D.getIdentifierLoc();

	// FIXME: Unnamed fields can be handled in various different ways, for
	// example, unnamed unions inject all members into the struct namespace!

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType T = TInfo->getType();

	if (BitWidth) {
	// 6.7.2.1p3, 6.7.2.1p4
	BitWidth = VerifyBitField(Loc, II, T, /IsMsStruct/false, BitWidth).get();
	if (!BitWidth)
	D.setInvalidType();
	} else {
	// Not a bitfield.

	// validate II.

	}
	if (T->isReferenceType()) {
	Diag(Loc, diag::err_ivar_reference_type);
	D.setInvalidType();
	}
	// C99 6.7.2.1p8: A member of a structure or union may have any type other
	// than a variably modified type.
	else if (T->isVariablyModifiedType()) {
	if (!tryToFixVariablyModifiedVarType(
	TInfo, T, Loc, diag::err_typecheck_ivar_variable_size))
	D.setInvalidType();
	}

	// Get the visibility (access control) for this ivar.
	ObjCIvarDecl::AccessControl ac =
	Visibility != tok::objc_not_keyword ? TranslateIvarVisibility(Visibility)
	: ObjCIvarDecl::None;
	// Must set ivar's DeclContext to its enclosing interface.
	ObjCContainerDecl *EnclosingDecl = cast<ObjCContainerDecl>(CurContext);
	if (!EnclosingDecl \|\| EnclosingDecl->isInvalidDecl())
	return nullptr;
	ObjCContainerDecl *EnclosingContext;
	if (ObjCImplementationDecl *IMPDecl =
	dyn_cast<ObjCImplementationDecl>(EnclosingDecl)) {
	if (LangOpts.ObjCRuntime.isFragile()) {
	// Case of ivar declared in an implementation. Context is that of its class.
	EnclosingContext = IMPDecl->getClassInterface();
	assert(EnclosingContext && "Implementation has no class interface!");
	}
	else
	EnclosingContext = EnclosingDecl;
	} else {
	if (ObjCCategoryDecl *CDecl =
	dyn_cast<ObjCCategoryDecl>(EnclosingDecl)) {
	if (LangOpts.ObjCRuntime.isFragile() \|\| !CDecl->IsClassExtension()) {
	Diag(Loc, diag::err_misplaced_ivar) << CDecl->IsClassExtension();
	return nullptr;
	}
	}
	EnclosingContext = EnclosingDecl;
	}

	// Construct the decl.
	ObjCIvarDecl *NewID = ObjCIvarDecl::Create(Context, EnclosingContext,
	DeclStart, Loc, II, T,
	TInfo, ac, (Expr *)BitfieldWidth);

	if (II) {
	NamedDecl *PrevDecl = LookupSingleName(S, II, Loc, LookupMemberName,
	ForVisibleRedeclaration);
	if (PrevDecl && isDeclInScope(PrevDecl, EnclosingContext, S)
	&& !isa<TagDecl>(PrevDecl)) {
	Diag(Loc, diag::err_duplicate_member) << II;
	Diag(PrevDecl->getLocation(), diag::note_previous_declaration);
	NewID->setInvalidDecl();
	}
	}

	// Process attributes attached to the ivar.
	ProcessDeclAttributes(S, NewID, D);

	if (D.isInvalidType())
	NewID->setInvalidDecl();

	// In ARC, infer 'retaining' for ivars of retainable type.
	if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(NewID))
	NewID->setInvalidDecl();

	if (D.getDeclSpec().isModulePrivateSpecified())
	NewID->setModulePrivate();

	if (II) {
	// FIXME: When interfaces are DeclContexts, we'll need to add
	// these to the interface.
	S->AddDecl(NewID);
	IdResolver.AddDecl(NewID);
	}

	if (LangOpts.ObjCRuntime.isNonFragile() &&
	!NewID->isInvalidDecl() && isa<ObjCInterfaceDecl>(EnclosingDecl))
	Diag(Loc, diag::warn_ivars_in_interface);

	return NewID;
	}

	/// ActOnLastBitfield - This routine handles synthesized bitfields rules for
	/// class and class extensions. For every class \@interface and class
	/// extension \@interface, if the last ivar is a bitfield of any type,
	/// then add an implicit `char :0` ivar to the end of that interface.
	void Sema::ActOnLastBitfield(SourceLocation DeclLoc,
	SmallVectorImpl<Decl *> &AllIvarDecls) {
	if (LangOpts.ObjCRuntime.isFragile() \|\| AllIvarDecls.empty())
	return;

	Decl *ivarDecl = AllIvarDecls[AllIvarDecls.size()-1];
	ObjCIvarDecl *Ivar = cast<ObjCIvarDecl>(ivarDecl);

	if (!Ivar->isBitField() \|\| Ivar->isZeroLengthBitField(Context))
	return;
	ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(CurContext);
	if (!ID) {
	if (ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(CurContext)) {
	if (!CD->IsClassExtension())
	return;
	}
	// No need to add this to end of @implementation.
	else
	return;
	}
	// All conditions are met. Add a new bitfield to the tail end of ivars.
	llvm::APInt Zero(Context.getTypeSize(Context.IntTy), 0);
	Expr * BW = IntegerLiteral::Create(Context, Zero, Context.IntTy, DeclLoc);

	Ivar = ObjCIvarDecl::Create(Context, cast<ObjCContainerDecl>(CurContext),
	DeclLoc, DeclLoc, nullptr,
	Context.CharTy,
	Context.getTrivialTypeSourceInfo(Context.CharTy,
	DeclLoc),
	ObjCIvarDecl::Private, BW,
	true);
	AllIvarDecls.push_back(Ivar);
	}

	/// [class.dtor]p4:
	/// At the end of the definition of a class, overload resolution is
	/// performed among the prospective destructors declared in that class with
	/// an empty argument list to select the destructor for the class, also
	/// known as the selected destructor.
	///
	/// We do the overload resolution here, then mark the selected constructor in the AST.
	/// Later CXXRecordDecl::getDestructor() will return the selected constructor.
	static void ComputeSelectedDestructor(Sema &S, CXXRecordDecl *Record) {
	if (!Record->hasUserDeclaredDestructor()) {
	return;
	}

	SourceLocation Loc = Record->getLocation();
	OverloadCandidateSet OCS(Loc, OverloadCandidateSet::CSK_Normal);

	for (auto *Decl : Record->decls()) {
	if (auto *DD = dyn_cast<CXXDestructorDecl>(Decl)) {
	if (DD->isInvalidDecl())
	continue;
	S.AddOverloadCandidate(DD, DeclAccessPair::make(DD, DD->getAccess()), {},
	OCS);
	assert(DD->isIneligibleOrNotSelected() && "Selecting a destructor but a destructor was already selected.");
	}
	}

	if (OCS.empty()) {
	return;
	}
	OverloadCandidateSet::iterator Best;
	unsigned Msg = 0;
	OverloadCandidateDisplayKind DisplayKind;

	switch (OCS.BestViableFunction(S, Loc, Best)) {
	case OR_Success:
	case OR_Deleted:
	Record->addedSelectedDestructor(dyn_cast<CXXDestructorDecl>(Best->Function));
	break;

	case OR_Ambiguous:
	Msg = diag::err_ambiguous_destructor;
	DisplayKind = OCD_AmbiguousCandidates;
	break;

	case OR_No_Viable_Function:
	Msg = diag::err_no_viable_destructor;
	DisplayKind = OCD_AllCandidates;
	break;
	}

	if (Msg) {
	// OpenCL have got their own thing going with destructors. It's slightly broken,
	// but we allow it.
	if (!S.LangOpts.OpenCL) {
	PartialDiagnostic Diag = S.PDiag(Msg) << Record;
	OCS.NoteCandidates(PartialDiagnosticAt(Loc, Diag), S, DisplayKind, {});
	Record->setInvalidDecl();
	}
	// It's a bit hacky: At this point we've raised an error but we want the
	// rest of the compiler to continue somehow working. However almost
	// everything we'll try to do with the class will depend on there being a
	// destructor. So let's pretend the first one is selected and hope for the
	// best.
	Record->addedSelectedDestructor(dyn_cast<CXXDestructorDecl>(OCS.begin()->Function));
	}
	}

	/// [class.mem.special]p5
	/// Two special member functions are of the same kind if:
	/// - they are both default constructors,
	/// - they are both copy or move constructors with the same first parameter
	/// type, or
	/// - they are both copy or move assignment operators with the same first
	/// parameter type and the same cv-qualifiers and ref-qualifier, if any.
	static bool AreSpecialMemberFunctionsSameKind(ASTContext &Context,
	CXXMethodDecl *M1,
	CXXMethodDecl *M2,
	Sema::CXXSpecialMember CSM) {
	// We don't want to compare templates to non-templates: See
	// https://github.com/llvm/llvm-project/issues/59206
	if (CSM == Sema::CXXDefaultConstructor)
	return bool(M1->getDescribedFunctionTemplate()) ==
	bool(M2->getDescribedFunctionTemplate());
	if (!Context.hasSameType(M1->getParamDecl(0)->getType(),
	M2->getParamDecl(0)->getType()))
	return false;
	if (!Context.hasSameType(M1->getThisType(), M2->getThisType()))
	return false;

	return true;
	}

	/// [class.mem.special]p6:
	/// An eligible special member function is a special member function for which:
	/// - the function is not deleted,
	/// - the associated constraints, if any, are satisfied, and
	/// - no special member function of the same kind whose associated constraints
	/// [CWG2595], if any, are satisfied is more constrained.
	static void SetEligibleMethods(Sema &S, CXXRecordDecl *Record,
	ArrayRef<CXXMethodDecl *> Methods,
	Sema::CXXSpecialMember CSM) {
	SmallVector<bool, 4> SatisfactionStatus;

	for (CXXMethodDecl *Method : Methods) {
	const Expr *Constraints = Method->getTrailingRequiresClause();
	if (!Constraints)
	SatisfactionStatus.push_back(true);
	else {
	ConstraintSatisfaction Satisfaction;
	if (S.CheckFunctionConstraints(Method, Satisfaction))
	SatisfactionStatus.push_back(false);
	else
	SatisfactionStatus.push_back(Satisfaction.IsSatisfied);
	}
	}

	for (size_t i = 0; i < Methods.size(); i++) {
	if (!SatisfactionStatus[i])
	continue;
	CXXMethodDecl *Method = Methods[i];
	CXXMethodDecl *OrigMethod = Method;
	if (FunctionDecl *MF = OrigMethod->getInstantiatedFromMemberFunction())
	OrigMethod = cast<CXXMethodDecl>(MF);

	const Expr *Constraints = OrigMethod->getTrailingRequiresClause();
	bool AnotherMethodIsMoreConstrained = false;
	for (size_t j = 0; j < Methods.size(); j++) {
	if (i == j \|\| !SatisfactionStatus[j])
	continue;
	CXXMethodDecl *OtherMethod = Methods[j];
	if (FunctionDecl *MF = OtherMethod->getInstantiatedFromMemberFunction())
	OtherMethod = cast<CXXMethodDecl>(MF);

	if (!AreSpecialMemberFunctionsSameKind(S.Context, OrigMethod, OtherMethod,
	CSM))
	continue;

	const Expr *OtherConstraints = OtherMethod->getTrailingRequiresClause();
	if (!OtherConstraints)
	continue;
	if (!Constraints) {
	AnotherMethodIsMoreConstrained = true;
	break;
	}
	if (S.IsAtLeastAsConstrained(OtherMethod, {OtherConstraints}, OrigMethod,
	{Constraints},
	AnotherMethodIsMoreConstrained)) {
	// There was an error with the constraints comparison. Exit the loop
	// and don't consider this function eligible.
	AnotherMethodIsMoreConstrained = true;
	}
	if (AnotherMethodIsMoreConstrained)
	break;
	}
	// FIXME: Do not consider deleted methods as eligible after implementing
	// DR1734 and DR1496.
	if (!AnotherMethodIsMoreConstrained) {
	Method->setIneligibleOrNotSelected(false);
	Record->addedEligibleSpecialMemberFunction(Method, 1 << CSM);
	}
	}
	}

	static void ComputeSpecialMemberFunctionsEligiblity(Sema &S,
	CXXRecordDecl *Record) {
	SmallVector<CXXMethodDecl *, 4> DefaultConstructors;
	SmallVector<CXXMethodDecl *, 4> CopyConstructors;
	SmallVector<CXXMethodDecl *, 4> MoveConstructors;
	SmallVector<CXXMethodDecl *, 4> CopyAssignmentOperators;
	SmallVector<CXXMethodDecl *, 4> MoveAssignmentOperators;

	for (auto *Decl : Record->decls()) {
	auto *MD = dyn_cast<CXXMethodDecl>(Decl);
	if (!MD) {
	auto *FTD = dyn_cast<FunctionTemplateDecl>(Decl);
	if (FTD)
	MD = dyn_cast<CXXMethodDecl>(FTD->getTemplatedDecl());
	}
	if (!MD)
	continue;
	if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
	if (CD->isInvalidDecl())
	continue;
	if (CD->isDefaultConstructor())
	DefaultConstructors.push_back(MD);
	else if (CD->isCopyConstructor())
	CopyConstructors.push_back(MD);
	else if (CD->isMoveConstructor())
	MoveConstructors.push_back(MD);
	} else if (MD->isCopyAssignmentOperator()) {
	CopyAssignmentOperators.push_back(MD);
	} else if (MD->isMoveAssignmentOperator()) {
	MoveAssignmentOperators.push_back(MD);
	}
	}

	SetEligibleMethods(S, Record, DefaultConstructors,
	Sema::CXXDefaultConstructor);
	SetEligibleMethods(S, Record, CopyConstructors, Sema::CXXCopyConstructor);
	SetEligibleMethods(S, Record, MoveConstructors, Sema::CXXMoveConstructor);
	SetEligibleMethods(S, Record, CopyAssignmentOperators,
	Sema::CXXCopyAssignment);
	SetEligibleMethods(S, Record, MoveAssignmentOperators,
	Sema::CXXMoveAssignment);
	}

	void Sema::ActOnFields(Scope S, SourceLocation RecLoc, Decl EnclosingDecl,
	ArrayRef<Decl *> Fields, SourceLocation LBrac,
	SourceLocation RBrac,
	const ParsedAttributesView &Attrs) {
	assert(EnclosingDecl && "missing record or interface decl");

	// If this is an Objective-C @implementation or category and we have
	// new fields here we should reset the layout of the interface since
	// it will now change.
	if (!Fields.empty() && isa<ObjCContainerDecl>(EnclosingDecl)) {
	ObjCContainerDecl *DC = cast<ObjCContainerDecl>(EnclosingDecl);
	switch (DC->getKind()) {
	default: break;
	case Decl::ObjCCategory:
	Context.ResetObjCLayout(cast<ObjCCategoryDecl>(DC)->getClassInterface());
	break;
	case Decl::ObjCImplementation:
	Context.
	ResetObjCLayout(cast<ObjCImplementationDecl>(DC)->getClassInterface());
	break;
	}
	}

	RecordDecl *Record = dyn_cast<RecordDecl>(EnclosingDecl);
	CXXRecordDecl *CXXRecord = dyn_cast<CXXRecordDecl>(EnclosingDecl);

	// Start counting up the number of named members; make sure to include
	// members of anonymous structs and unions in the total.
	unsigned NumNamedMembers = 0;
	if (Record) {
	for (const auto *I : Record->decls()) {
	if (const auto *IFD = dyn_cast<IndirectFieldDecl>(I))
	if (IFD->getDeclName())
	++NumNamedMembers;
	}
	}

	// Verify that all the fields are okay.
	SmallVector<FieldDecl*, 32> RecFields;

	for (ArrayRef<Decl *>::iterator i = Fields.begin(), end = Fields.end();
	i != end; ++i) {
	FieldDecl FD = cast<FieldDecl>(i);

	// Get the type for the field.
	const Type *FDTy = FD->getType().getTypePtr();

	if (!FD->isAnonymousStructOrUnion()) {
	// Remember all fields written by the user.
	RecFields.push_back(FD);
	}

	// If the field is already invalid for some reason, don't emit more
	// diagnostics about it.
	if (FD->isInvalidDecl()) {
	EnclosingDecl->setInvalidDecl();
	continue;
	}

	// C99 6.7.2.1p2:
	// A structure or union shall not contain a member with
	// incomplete or function type (hence, a structure shall not
	// contain an instance of itself, but may contain a pointer to
	// an instance of itself), except that the last member of a
	// structure with more than one named member may have incomplete
	// array type; such a structure (and any union containing,
	// possibly recursively, a member that is such a structure)
	// shall not be a member of a structure or an element of an
	// array.
	bool IsLastField = (i + 1 == Fields.end());
	if (FDTy->isFunctionType()) {
	// Field declared as a function.
	Diag(FD->getLocation(), diag::err_field_declared_as_function)
	<< FD->getDeclName();
	FD->setInvalidDecl();
	EnclosingDecl->setInvalidDecl();
	continue;
	} else if (FDTy->isIncompleteArrayType() &&
	(Record \|\| isa<ObjCContainerDecl>(EnclosingDecl))) {
	if (Record) {
	// Flexible array member.
	// Microsoft and g++ is more permissive regarding flexible array.
	// It will accept flexible array in union and also
	// as the sole element of a struct/class.
	unsigned DiagID = 0;
	if (!Record->isUnion() && !IsLastField) {
	Diag(FD->getLocation(), diag::err_flexible_array_not_at_end)
	<< FD->getDeclName() << FD->getType() << Record->getTagKind();
	Diag((*(i + 1))->getLocation(), diag::note_next_field_declaration);
	FD->setInvalidDecl();
	EnclosingDecl->setInvalidDecl();
	continue;
	} else if (Record->isUnion())
	DiagID = getLangOpts().MicrosoftExt
	? diag::ext_flexible_array_union_ms
	: getLangOpts().CPlusPlus
	? diag::ext_flexible_array_union_gnu
	: diag::err_flexible_array_union;
	else if (NumNamedMembers < 1)
	DiagID = getLangOpts().MicrosoftExt
	? diag::ext_flexible_array_empty_aggregate_ms
	: getLangOpts().CPlusPlus
	? diag::ext_flexible_array_empty_aggregate_gnu
	: diag::err_flexible_array_empty_aggregate;

	if (DiagID)
	Diag(FD->getLocation(), DiagID) << FD->getDeclName()
	<< Record->getTagKind();
	// While the layout of types that contain virtual bases is not specified
	// by the C++ standard, both the Itanium and Microsoft C++ ABIs place
	// virtual bases after the derived members. This would make a flexible
	// array member declared at the end of an object not adjacent to the end
	// of the type.
	if (CXXRecord && CXXRecord->getNumVBases() != 0)
	Diag(FD->getLocation(), diag::err_flexible_array_virtual_base)
	<< FD->getDeclName() << Record->getTagKind();
	if (!getLangOpts().C99)
	Diag(FD->getLocation(), diag::ext_c99_flexible_array_member)
	<< FD->getDeclName() << Record->getTagKind();

	// If the element type has a non-trivial destructor, we would not
	// implicitly destroy the elements, so disallow it for now.
	//
	// FIXME: GCC allows this. We should probably either implicitly delete
	// the destructor of the containing class, or just allow this.
	QualType BaseElem = Context.getBaseElementType(FD->getType());
	if (!BaseElem->isDependentType() && BaseElem.isDestructedType()) {
	Diag(FD->getLocation(), diag::err_flexible_array_has_nontrivial_dtor)
	<< FD->getDeclName() << FD->getType();
	FD->setInvalidDecl();
	EnclosingDecl->setInvalidDecl();
	continue;
	}
	// Okay, we have a legal flexible array member at the end of the struct.
	Record->setHasFlexibleArrayMember(true);
	} else {
	// In ObjCContainerDecl ivars with incomplete array type are accepted,
	// unless they are followed by another ivar. That check is done
	// elsewhere, after synthesized ivars are known.
	}
	} else if (!FDTy->isDependentType() &&
	RequireCompleteSizedType(
	FD->getLocation(), FD->getType(),
	diag::err_field_incomplete_or_sizeless)) {
	// Incomplete type
	FD->setInvalidDecl();
	EnclosingDecl->setInvalidDecl();
	continue;
	} else if (const RecordType *FDTTy = FDTy->getAs<RecordType>()) {
	if (Record && FDTTy->getDecl()->hasFlexibleArrayMember()) {
	// A type which contains a flexible array member is considered to be a
	// flexible array member.
	Record->setHasFlexibleArrayMember(true);
	if (!Record->isUnion()) {
	// If this is a struct/class and this is not the last element, reject
	// it. Note that GCC supports variable sized arrays in the middle of
	// structures.
	if (!IsLastField)
	Diag(FD->getLocation(), diag::ext_variable_sized_type_in_struct)
	<< FD->getDeclName() << FD->getType();
	else {
	// We support flexible arrays at the end of structs in
	// other structs as an extension.
	Diag(FD->getLocation(), diag::ext_flexible_array_in_struct)
	<< FD->getDeclName();
	}
	}
	}
	if (isa<ObjCContainerDecl>(EnclosingDecl) &&
	RequireNonAbstractType(FD->getLocation(), FD->getType(),
	diag::err_abstract_type_in_decl,
	AbstractIvarType)) {
	// Ivars can not have abstract class types
	FD->setInvalidDecl();
	}
	if (Record && FDTTy->getDecl()->hasObjectMember())
	Record->setHasObjectMember(true);
	if (Record && FDTTy->getDecl()->hasVolatileMember())
	Record->setHasVolatileMember(true);
	} else if (FDTy->isObjCObjectType()) {
	/// A field cannot be an Objective-c object
	Diag(FD->getLocation(), diag::err_statically_allocated_object)
	<< FixItHint::CreateInsertion(FD->getLocation(), "*");
	QualType T = Context.getObjCObjectPointerType(FD->getType());
	FD->setType(T);
	} else if (Record && Record->isUnion() &&
	FD->getType().hasNonTrivialObjCLifetime() &&
	getSourceManager().isInSystemHeader(FD->getLocation()) &&
	!getLangOpts().CPlusPlus && !FD->hasAttr<UnavailableAttr>() &&
	(FD->getType().getObjCLifetime() != Qualifiers::OCL_Strong \|\|
	!Context.hasDirectOwnershipQualifier(FD->getType()))) {
	// For backward compatibility, fields of C unions declared in system
	// headers that have non-trivial ObjC ownership qualifications are marked
	// as unavailable unless the qualifier is explicit and __strong. This can
	// break ABI compatibility between programs compiled with ARC and MRR, but
	// is a better option than rejecting programs using those unions under
	// ARC.
	FD->addAttr(UnavailableAttr::CreateImplicit(
	Context, "", UnavailableAttr::IR_ARCFieldWithOwnership,
	FD->getLocation()));
	} else if (getLangOpts().ObjC &&
	getLangOpts().getGC() != LangOptions::NonGC && Record &&
	!Record->hasObjectMember()) {
	if (FD->getType()->isObjCObjectPointerType() \|\|
	FD->getType().isObjCGCStrong())
	Record->setHasObjectMember(true);
	else if (Context.getAsArrayType(FD->getType())) {
	QualType BaseType = Context.getBaseElementType(FD->getType());
	if (BaseType->isRecordType() &&
	BaseType->castAs<RecordType>()->getDecl()->hasObjectMember())
	Record->setHasObjectMember(true);
	else if (BaseType->isObjCObjectPointerType() \|\|
	BaseType.isObjCGCStrong())
	Record->setHasObjectMember(true);
	}
	}

	if (Record && !getLangOpts().CPlusPlus &&
	!shouldIgnoreForRecordTriviality(FD)) {
	QualType FT = FD->getType();
	if (FT.isNonTrivialToPrimitiveDefaultInitialize()) {
	Record->setNonTrivialToPrimitiveDefaultInitialize(true);
	if (FT.hasNonTrivialToPrimitiveDefaultInitializeCUnion() \|\|
	Record->isUnion())
	Record->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(true);
	}
	QualType::PrimitiveCopyKind PCK = FT.isNonTrivialToPrimitiveCopy();
	if (PCK != QualType::PCK_Trivial && PCK != QualType::PCK_VolatileTrivial) {
	Record->setNonTrivialToPrimitiveCopy(true);
	if (FT.hasNonTrivialToPrimitiveCopyCUnion() \|\| Record->isUnion())
	Record->setHasNonTrivialToPrimitiveCopyCUnion(true);
	}
	if (FT.isDestructedType()) {
	Record->setNonTrivialToPrimitiveDestroy(true);
	Record->setParamDestroyedInCallee(true);
	if (FT.hasNonTrivialToPrimitiveDestructCUnion() \|\| Record->isUnion())
	Record->setHasNonTrivialToPrimitiveDestructCUnion(true);
	}

	if (const auto *RT = FT->getAs<RecordType>()) {
	if (RT->getDecl()->getArgPassingRestrictions() ==
	RecordDecl::APK_CanNeverPassInRegs)
	Record->setArgPassingRestrictions(RecordDecl::APK_CanNeverPassInRegs);
	} else if (FT.getQualifiers().getObjCLifetime() == Qualifiers::OCL_Weak)
	Record->setArgPassingRestrictions(RecordDecl::APK_CanNeverPassInRegs);
	}

	if (Record && FD->getType().isVolatileQualified())
	Record->setHasVolatileMember(true);
	// Keep track of the number of named members.
	if (FD->getIdentifier())
	++NumNamedMembers;
	}

	// Okay, we successfully defined 'Record'.
	if (Record) {
	bool Completed = false;
	if (CXXRecord) {
	if (!CXXRecord->isInvalidDecl()) {
	// Set access bits correctly on the directly-declared conversions.
	for (CXXRecordDecl::conversion_iterator
	I = CXXRecord->conversion_begin(),
	E = CXXRecord->conversion_end(); I != E; ++I)
	I.setAccess((*I)->getAccess());
	}

	// Add any implicitly-declared members to this class.
	AddImplicitlyDeclaredMembersToClass(CXXRecord);

	if (!CXXRecord->isDependentType()) {
	if (!CXXRecord->isInvalidDecl()) {
	// If we have virtual base classes, we may end up finding multiple
	// final overriders for a given virtual function. Check for this
	// problem now.
	if (CXXRecord->getNumVBases()) {
	CXXFinalOverriderMap FinalOverriders;
	CXXRecord->getFinalOverriders(FinalOverriders);

	for (CXXFinalOverriderMap::iterator M = FinalOverriders.begin(),
	MEnd = FinalOverriders.end();
	M != MEnd; ++M) {
	for (OverridingMethods::iterator SO = M->second.begin(),
	SOEnd = M->second.end();
	SO != SOEnd; ++SO) {
	assert(SO->second.size() > 0 &&
	"Virtual function without overriding functions?");
	if (SO->second.size() == 1)
	continue;

	// C++ [class.virtual]p2:
	// In a derived class, if a virtual member function of a base
	// class subobject has more than one final overrider the
	// program is ill-formed.
	Diag(Record->getLocation(), diag::err_multiple_final_overriders)
	<< (const NamedDecl *)M->first << Record;
	Diag(M->first->getLocation(),
	diag::note_overridden_virtual_function);
	for (OverridingMethods::overriding_iterator
	OM = SO->second.begin(),
	OMEnd = SO->second.end();
	OM != OMEnd; ++OM)
	Diag(OM->Method->getLocation(), diag::note_final_overrider)
	<< (const NamedDecl *)M->first << OM->Method->getParent();

	Record->setInvalidDecl();
	}
	}
	CXXRecord->completeDefinition(&FinalOverriders);
	Completed = true;
	}
	}
	ComputeSelectedDestructor(*this, CXXRecord);
	ComputeSpecialMemberFunctionsEligiblity(*this, CXXRecord);
	}
	}

	if (!Completed)
	Record->completeDefinition();

	// Handle attributes before checking the layout.
	ProcessDeclAttributeList(S, Record, Attrs);

	// Check to see if a FieldDecl is a pointer to a function.
	auto IsFunctionPointerOrForwardDecl = [&](const Decl *D) {
	const FieldDecl *FD = dyn_cast<FieldDecl>(D);
	if (!FD) {
	// Check whether this is a forward declaration that was inserted by
	// Clang. This happens when a non-forward declared / defined type is
	// used, e.g.:
	//
	// struct foo {
	// struct bar (f)();
	// struct bar (g)();
	// };
	//
	// "struct bar" shows up in the decl AST as a "RecordDecl" with an
	// incomplete definition.
	if (const auto *TD = dyn_cast<TagDecl>(D))
	return !TD->isCompleteDefinition();
	return false;
	}
	QualType FieldType = FD->getType().getDesugaredType(Context);
	if (isa<PointerType>(FieldType)) {
	QualType PointeeType = cast<PointerType>(FieldType)->getPointeeType();
	return PointeeType.getDesugaredType(Context)->isFunctionType();
	}
	return false;
	};

	// Maybe randomize the record's decls. We automatically randomize a record
	// of function pointers, unless it has the "no_randomize_layout" attribute.
	if (!getLangOpts().CPlusPlus &&
	(Record->hasAttr<RandomizeLayoutAttr>() \|\|
	(!Record->hasAttr<NoRandomizeLayoutAttr>() &&
	llvm::all_of(Record->decls(), IsFunctionPointerOrForwardDecl))) &&
	!Record->isUnion() && !getLangOpts().RandstructSeed.empty() &&
	!Record->isRandomized()) {
	SmallVector<Decl *, 32> NewDeclOrdering;
	if (randstruct::randomizeStructureLayout(Context, Record,
	NewDeclOrdering))
	Record->reorderDecls(NewDeclOrdering);
	}

	// We may have deferred checking for a deleted destructor. Check now.
	if (CXXRecord) {
	auto *Dtor = CXXRecord->getDestructor();
	if (Dtor && Dtor->isImplicit() &&
	ShouldDeleteSpecialMember(Dtor, CXXDestructor)) {
	CXXRecord->setImplicitDestructorIsDeleted();
	SetDeclDeleted(Dtor, CXXRecord->getLocation());
	}
	}

	if (Record->hasAttrs()) {
	CheckAlignasUnderalignment(Record);

	if (const MSInheritanceAttr *IA = Record->getAttr<MSInheritanceAttr>())
	checkMSInheritanceAttrOnDefinition(cast<CXXRecordDecl>(Record),
	IA->getRange(), IA->getBestCase(),
	IA->getInheritanceModel());
	}

	// Check if the structure/union declaration is a type that can have zero
	// size in C. For C this is a language extension, for C++ it may cause
	// compatibility problems.
	bool CheckForZeroSize;
	if (!getLangOpts().CPlusPlus) {
	CheckForZeroSize = true;
	} else {
	// For C++ filter out types that cannot be referenced in C code.
	CXXRecordDecl *CXXRecord = cast<CXXRecordDecl>(Record);
	CheckForZeroSize =
	CXXRecord->getLexicalDeclContext()->isExternCContext() &&
	!CXXRecord->isDependentType() && !inTemplateInstantiation() &&
	CXXRecord->isCLike();
	}
	if (CheckForZeroSize) {
	bool ZeroSize = true;
	bool IsEmpty = true;
	unsigned NonBitFields = 0;
	for (RecordDecl::field_iterator I = Record->field_begin(),
	E = Record->field_end();
	(NonBitFields == 0 \|\| ZeroSize) && I != E; ++I) {
	IsEmpty = false;
	if (I->isUnnamedBitfield()) {
	if (!I->isZeroLengthBitField(Context))
	ZeroSize = false;
	} else {
	++NonBitFields;
	QualType FieldType = I->getType();
	if (FieldType->isIncompleteType() \|\|
	!Context.getTypeSizeInChars(FieldType).isZero())
	ZeroSize = false;
	}
	}

	// Empty structs are an extension in C (C99 6.7.2.1p7). They are
	// allowed in C++, but warn if its declaration is inside
	// extern "C" block.
	if (ZeroSize) {
	Diag(RecLoc, getLangOpts().CPlusPlus ?
	diag::warn_zero_size_struct_union_in_extern_c :
	diag::warn_zero_size_struct_union_compat)
	<< IsEmpty << Record->isUnion() << (NonBitFields > 1);
	}

	// Structs without named members are extension in C (C99 6.7.2.1p7),
	// but are accepted by GCC.
	if (NonBitFields == 0 && !getLangOpts().CPlusPlus) {
	Diag(RecLoc, IsEmpty ? diag::ext_empty_struct_union :
	diag::ext_no_named_members_in_struct_union)
	<< Record->isUnion();
	}
	}
	} else {
	ObjCIvarDecl **ClsFields =
	reinterpret_cast<ObjCIvarDecl**>(RecFields.data());
	if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(EnclosingDecl)) {
	ID->setEndOfDefinitionLoc(RBrac);
	// Add ivar's to class's DeclContext.
	for (unsigned i = 0, e = RecFields.size(); i != e; ++i) {
	ClsFields[i]->setLexicalDeclContext(ID);
	ID->addDecl(ClsFields[i]);
	}
	// Must enforce the rule that ivars in the base classes may not be
	// duplicates.
	if (ID->getSuperClass())
	DiagnoseDuplicateIvars(ID, ID->getSuperClass());
	} else if (ObjCImplementationDecl *IMPDecl =
	dyn_cast<ObjCImplementationDecl>(EnclosingDecl)) {
	assert(IMPDecl && "ActOnFields - missing ObjCImplementationDecl");
	for (unsigned I = 0, N = RecFields.size(); I != N; ++I)
	// Ivar declared in @implementation never belongs to the implementation.
	// Only it is in implementation's lexical context.
	ClsFields[I]->setLexicalDeclContext(IMPDecl);
	CheckImplementationIvars(IMPDecl, ClsFields, RecFields.size(), RBrac);
	IMPDecl->setIvarLBraceLoc(LBrac);
	IMPDecl->setIvarRBraceLoc(RBrac);
	} else if (ObjCCategoryDecl *CDecl =
	dyn_cast<ObjCCategoryDecl>(EnclosingDecl)) {
	// case of ivars in class extension; all other cases have been
	// reported as errors elsewhere.
	// FIXME. Class extension does not have a LocEnd field.
	// CDecl->setLocEnd(RBrac);
	// Add ivar's to class extension's DeclContext.
	// Diagnose redeclaration of private ivars.
	ObjCInterfaceDecl *IDecl = CDecl->getClassInterface();
	for (unsigned i = 0, e = RecFields.size(); i != e; ++i) {
	if (IDecl) {
	if (const ObjCIvarDecl *ClsIvar =
	IDecl->getIvarDecl(ClsFields[i]->getIdentifier())) {
	Diag(ClsFields[i]->getLocation(),
	diag::err_duplicate_ivar_declaration);
	Diag(ClsIvar->getLocation(), diag::note_previous_definition);
	continue;
	}
	for (const auto *Ext : IDecl->known_extensions()) {
	if (const ObjCIvarDecl *ClsExtIvar
	= Ext->getIvarDecl(ClsFields[i]->getIdentifier())) {
	Diag(ClsFields[i]->getLocation(),
	diag::err_duplicate_ivar_declaration);
	Diag(ClsExtIvar->getLocation(), diag::note_previous_definition);
	continue;
	}
	}
	}
	ClsFields[i]->setLexicalDeclContext(CDecl);
	CDecl->addDecl(ClsFields[i]);
	}
	CDecl->setIvarLBraceLoc(LBrac);
	CDecl->setIvarRBraceLoc(RBrac);
	}
	}
	}

	/// Determine whether the given integral value is representable within
	/// the given type T.
	static bool isRepresentableIntegerValue(ASTContext &Context,
	llvm::APSInt &Value,
	QualType T) {
	assert((T->isIntegralType(Context) \|\| T->isEnumeralType()) &&
	"Integral type required!");
	unsigned BitWidth = Context.getIntWidth(T);

	if (Value.isUnsigned() \|\| Value.isNonNegative()) {
	if (T->isSignedIntegerOrEnumerationType())
	--BitWidth;
	return Value.getActiveBits() <= BitWidth;
	}
	return Value.getMinSignedBits() <= BitWidth;
	}

	// Given an integral type, return the next larger integral type
	// (or a NULL type of no such type exists).
	static QualType getNextLargerIntegralType(ASTContext &Context, QualType T) {
	// FIXME: Int128/UInt128 support, which also needs to be introduced into
	// enum checking below.
	assert((T->isIntegralType(Context) \|\|
	T->isEnumeralType()) && "Integral type required!");
	const unsigned NumTypes = 4;
	QualType SignedIntegralTypes[NumTypes] = {
	Context.ShortTy, Context.IntTy, Context.LongTy, Context.LongLongTy
	};
	QualType UnsignedIntegralTypes[NumTypes] = {
	Context.UnsignedShortTy, Context.UnsignedIntTy, Context.UnsignedLongTy,
	Context.UnsignedLongLongTy
	};

	unsigned BitWidth = Context.getTypeSize(T);
	QualType *Types = T->isSignedIntegerOrEnumerationType()? SignedIntegralTypes
	: UnsignedIntegralTypes;
	for (unsigned I = 0; I != NumTypes; ++I)
	if (Context.getTypeSize(Types[I]) > BitWidth)
	return Types[I];

	return QualType();
	}

	EnumConstantDecl Sema::CheckEnumConstant(EnumDecl Enum,
	EnumConstantDecl *LastEnumConst,
	SourceLocation IdLoc,
	IdentifierInfo *Id,
	Expr *Val) {
	unsigned IntWidth = Context.getTargetInfo().getIntWidth();
	llvm::APSInt EnumVal(IntWidth);
	QualType EltTy;

	if (Val && DiagnoseUnexpandedParameterPack(Val, UPPC_EnumeratorValue))
	Val = nullptr;

	if (Val)
	Val = DefaultLvalueConversion(Val).get();

	if (Val) {
	if (Enum->isDependentType() \|\| Val->isTypeDependent() \|\|
	Val->containsErrors())
	EltTy = Context.DependentTy;
	else {
	// FIXME: We don't allow folding in C++11 mode for an enum with a fixed
	// underlying type, but do allow it in all other contexts.
	if (getLangOpts().CPlusPlus11 && Enum->isFixed()) {
	// C++11 [dcl.enum]p5: If the underlying type is fixed, [...] the
	// constant-expression in the enumerator-definition shall be a converted
	// constant expression of the underlying type.
	EltTy = Enum->getIntegerType();
	ExprResult Converted =
	CheckConvertedConstantExpression(Val, EltTy, EnumVal,
	CCEK_Enumerator);
	if (Converted.isInvalid())
	Val = nullptr;
	else
	Val = Converted.get();
	} else if (!Val->isValueDependent() &&
	!(Val =
	VerifyIntegerConstantExpression(Val, &EnumVal, AllowFold)
	.get())) {
	// C99 6.7.2.2p2: Make sure we have an integer constant expression.
	} else {
	if (Enum->isComplete()) {
	EltTy = Enum->getIntegerType();

	// In Obj-C and Microsoft mode, require the enumeration value to be
	// representable in the underlying type of the enumeration. In C++11,
	// we perform a non-narrowing conversion as part of converted constant
	// expression checking.
	if (!isRepresentableIntegerValue(Context, EnumVal, EltTy)) {
	if (Context.getTargetInfo()
	.getTriple()
	.isWindowsMSVCEnvironment()) {
	Diag(IdLoc, diag::ext_enumerator_too_large) << EltTy;
	} else {
	Diag(IdLoc, diag::err_enumerator_too_large) << EltTy;
	}
	}

	// Cast to the underlying type.
	Val = ImpCastExprToType(Val, EltTy,
	EltTy->isBooleanType() ? CK_IntegralToBoolean
	: CK_IntegralCast)
	.get();
	} else if (getLangOpts().CPlusPlus) {
	// C++11 [dcl.enum]p5:
	// If the underlying type is not fixed, the type of each enumerator
	// is the type of its initializing value:
	// - If an initializer is specified for an enumerator, the
	// initializing value has the same type as the expression.
	EltTy = Val->getType();
	} else {
	// C99 6.7.2.2p2:
	// The expression that defines the value of an enumeration constant
	// shall be an integer constant expression that has a value
	// representable as an int.

	// Complain if the value is not representable in an int.
	if (!isRepresentableIntegerValue(Context, EnumVal, Context.IntTy))
	Diag(IdLoc, diag::ext_enum_value_not_int)
	<< toString(EnumVal, 10) << Val->getSourceRange()
	<< (EnumVal.isUnsigned() \|\| EnumVal.isNonNegative());
	else if (!Context.hasSameType(Val->getType(), Context.IntTy)) {
	// Force the type of the expression to 'int'.
	Val = ImpCastExprToType(Val, Context.IntTy, CK_IntegralCast).get();
	}
	EltTy = Val->getType();
	}
	}
	}
	}

	if (!Val) {
	if (Enum->isDependentType())
	EltTy = Context.DependentTy;
	else if (!LastEnumConst) {
	// C++0x [dcl.enum]p5:
	// If the underlying type is not fixed, the type of each enumerator
	// is the type of its initializing value:
	// - If no initializer is specified for the first enumerator, the
	// initializing value has an unspecified integral type.
	//
	// GCC uses 'int' for its unspecified integral type, as does
	// C99 6.7.2.2p3.
	if (Enum->isFixed()) {
	EltTy = Enum->getIntegerType();
	}
	else {
	EltTy = Context.IntTy;
	}
	} else {
	// Assign the last value + 1.
	EnumVal = LastEnumConst->getInitVal();
	++EnumVal;
	EltTy = LastEnumConst->getType();

	// Check for overflow on increment.
	if (EnumVal < LastEnumConst->getInitVal()) {
	// C++0x [dcl.enum]p5:
	// If the underlying type is not fixed, the type of each enumerator
	// is the type of its initializing value:
	//
	// - Otherwise the type of the initializing value is the same as
	// the type of the initializing value of the preceding enumerator
	// unless the incremented value is not representable in that type,
	// in which case the type is an unspecified integral type
	// sufficient to contain the incremented value. If no such type
	// exists, the program is ill-formed.
	QualType T = getNextLargerIntegralType(Context, EltTy);
	if (T.isNull() \|\| Enum->isFixed()) {
	// There is no integral type larger enough to represent this
	// value. Complain, then allow the value to wrap around.
	EnumVal = LastEnumConst->getInitVal();
	EnumVal = EnumVal.zext(EnumVal.getBitWidth() * 2);
	++EnumVal;
	if (Enum->isFixed())
	// When the underlying type is fixed, this is ill-formed.
	Diag(IdLoc, diag::err_enumerator_wrapped)
	<< toString(EnumVal, 10)
	<< EltTy;
	else
	Diag(IdLoc, diag::ext_enumerator_increment_too_large)
	<< toString(EnumVal, 10);
	} else {
	EltTy = T;
	}

	// Retrieve the last enumerator's value, extent that type to the
	// type that is supposed to be large enough to represent the incremented
	// value, then increment.
	EnumVal = LastEnumConst->getInitVal();
	EnumVal.setIsSigned(EltTy->isSignedIntegerOrEnumerationType());
	EnumVal = EnumVal.zextOrTrunc(Context.getIntWidth(EltTy));
	++EnumVal;

	// If we're not in C++, diagnose the overflow of enumerator values,
	// which in C99 means that the enumerator value is not representable in
	// an int (C99 6.7.2.2p2). However, we support GCC's extension that
	// permits enumerator values that are representable in some larger
	// integral type.
	if (!getLangOpts().CPlusPlus && !T.isNull())
	Diag(IdLoc, diag::warn_enum_value_overflow);
	} else if (!getLangOpts().CPlusPlus &&
	!isRepresentableIntegerValue(Context, EnumVal, EltTy)) {
	// Enforce C99 6.7.2.2p2 even when we compute the next value.
	Diag(IdLoc, diag::ext_enum_value_not_int)
	<< toString(EnumVal, 10) << 1;
	}
	}
	}

	if (!EltTy->isDependentType()) {
	// Make the enumerator value match the signedness and size of the
	// enumerator's type.
	EnumVal = EnumVal.extOrTrunc(Context.getIntWidth(EltTy));
	EnumVal.setIsSigned(EltTy->isSignedIntegerOrEnumerationType());
	}

	return EnumConstantDecl::Create(Context, Enum, IdLoc, Id, EltTy,
	Val, EnumVal);
	}

	Sema::SkipBodyInfo Sema::shouldSkipAnonEnumBody(Scope S, IdentifierInfo II,
	SourceLocation IILoc) {
	if (!(getLangOpts().Modules \|\| getLangOpts().ModulesLocalVisibility) \|\|
	!getLangOpts().CPlusPlus)
	return SkipBodyInfo();

	// We have an anonymous enum definition. Look up the first enumerator to
	// determine if we should merge the definition with an existing one and
	// skip the body.
	NamedDecl *PrevDecl = LookupSingleName(S, II, IILoc, LookupOrdinaryName,
	forRedeclarationInCurContext());
	auto *PrevECD = dyn_cast_or_null<EnumConstantDecl>(PrevDecl);
	if (!PrevECD)
	return SkipBodyInfo();

	EnumDecl *PrevED = cast<EnumDecl>(PrevECD->getDeclContext());
	NamedDecl *Hidden;
	if (!PrevED->getDeclName() && !hasVisibleDefinition(PrevED, &Hidden)) {
	SkipBodyInfo Skip;
	Skip.Previous = Hidden;
	return Skip;
	}

	return SkipBodyInfo();
	}

	Decl Sema::ActOnEnumConstant(Scope S, Decl theEnumDecl, Decl lastEnumConst,
	SourceLocation IdLoc, IdentifierInfo *Id,
	const ParsedAttributesView &Attrs,
	SourceLocation EqualLoc, Expr *Val) {
	EnumDecl *TheEnumDecl = cast<EnumDecl>(theEnumDecl);
	EnumConstantDecl *LastEnumConst =
	cast_or_null<EnumConstantDecl>(lastEnumConst);

	// The scope passed in may not be a decl scope. Zip up the scope tree until
	// we find one that is.
	S = getNonFieldDeclScope(S);

	// Verify that there isn't already something declared with this name in this
	// scope.
	LookupResult R(*this, Id, IdLoc, LookupOrdinaryName, ForVisibleRedeclaration);
	LookupName(R, S);
	NamedDecl *PrevDecl = R.getAsSingle<NamedDecl>();

	if (PrevDecl && PrevDecl->isTemplateParameter()) {
	// Maybe we will complain about the shadowed template parameter.
	DiagnoseTemplateParameterShadow(IdLoc, PrevDecl);
	// Just pretend that we didn't see the previous declaration.
	PrevDecl = nullptr;
	}

	// C++ [class.mem]p15:
	// If T is the name of a class, then each of the following shall have a name
	// different from T:
	// - every enumerator of every member of class T that is an unscoped
	// enumerated type
	if (getLangOpts().CPlusPlus && !TheEnumDecl->isScoped())
	DiagnoseClassNameShadow(TheEnumDecl->getDeclContext(),
	DeclarationNameInfo(Id, IdLoc));

	EnumConstantDecl *New =
	CheckEnumConstant(TheEnumDecl, LastEnumConst, IdLoc, Id, Val);
	if (!New)
	return nullptr;

	if (PrevDecl) {
	if (!TheEnumDecl->isScoped() && isa<ValueDecl>(PrevDecl)) {
	// Check for other kinds of shadowing not already handled.
	CheckShadow(New, PrevDecl, R);
	}

	// When in C++, we may get a TagDecl with the same name; in this case the
	// enum constant will 'hide' the tag.
	assert((getLangOpts().CPlusPlus \|\| !isa<TagDecl>(PrevDecl)) &&
	"Received TagDecl when not in C++!");
	if (!isa<TagDecl>(PrevDecl) && isDeclInScope(PrevDecl, CurContext, S)) {
	if (isa<EnumConstantDecl>(PrevDecl))
	Diag(IdLoc, diag::err_redefinition_of_enumerator) << Id;
	else
	Diag(IdLoc, diag::err_redefinition) << Id;
	notePreviousDefinition(PrevDecl, IdLoc);
	return nullptr;
	}
	}

	// Process attributes.
	ProcessDeclAttributeList(S, New, Attrs);
	AddPragmaAttributes(S, New);

	// Register this decl in the current scope stack.
	New->setAccess(TheEnumDecl->getAccess());
	PushOnScopeChains(New, S);

	ActOnDocumentableDecl(New);

	return New;
	}

	// Returns true when the enum initial expression does not trigger the
	// duplicate enum warning. A few common cases are exempted as follows:
	// Element2 = Element1
	// Element2 = Element1 + 1
	// Element2 = Element1 - 1
	// Where Element2 and Element1 are from the same enum.
	static bool ValidDuplicateEnum(EnumConstantDecl ECD, EnumDecl Enum) {
	Expr *InitExpr = ECD->getInitExpr();
	if (!InitExpr)
	return true;
	InitExpr = InitExpr->IgnoreImpCasts();

	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(InitExpr)) {
	if (!BO->isAdditiveOp())
	return true;
	IntegerLiteral *IL = dyn_cast<IntegerLiteral>(BO->getRHS());
	if (!IL)
	return true;
	if (IL->getValue() != 1)
	return true;

	InitExpr = BO->getLHS();
	}

	// This checks if the elements are from the same enum.
	DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(InitExpr);
	if (!DRE)
	return true;

	EnumConstantDecl *EnumConstant = dyn_cast<EnumConstantDecl>(DRE->getDecl());
	if (!EnumConstant)
	return true;

	if (cast<EnumDecl>(TagDecl::castFromDeclContext(ECD->getDeclContext())) !=
	Enum)
	return true;

	return false;
	}

	// Emits a warning when an element is implicitly set a value that
	// a previous element has already been set to.
	static void CheckForDuplicateEnumValues(Sema &S, ArrayRef<Decl *> Elements,
	EnumDecl *Enum, QualType EnumType) {
	// Avoid anonymous enums
	if (!Enum->getIdentifier())
	return;

	// Only check for small enums.
	if (Enum->getNumPositiveBits() > 63 \|\| Enum->getNumNegativeBits() > 64)
	return;

	if (S.Diags.isIgnored(diag::warn_duplicate_enum_values, Enum->getLocation()))
	return;

	typedef SmallVector<EnumConstantDecl *, 3> ECDVector;
	typedef SmallVector<std::unique_ptr<ECDVector>, 3> DuplicatesVector;

	typedef llvm::PointerUnion<EnumConstantDecl, ECDVector> DeclOrVector;

	// DenseMaps cannot contain the all ones int64_t value, so use unordered_map.
	typedef std::unordered_map<int64_t, DeclOrVector> ValueToVectorMap;

	// Use int64_t as a key to avoid needing special handling for map keys.
	auto EnumConstantToKey = [](const EnumConstantDecl *D) {
	llvm::APSInt Val = D->getInitVal();
	return Val.isSigned() ? Val.getSExtValue() : Val.getZExtValue();
	};

	DuplicatesVector DupVector;
	ValueToVectorMap EnumMap;

	// Populate the EnumMap with all values represented by enum constants without
	// an initializer.
	for (auto *Element : Elements) {
	EnumConstantDecl *ECD = cast_or_null<EnumConstantDecl>(Element);

	// Null EnumConstantDecl means a previous diagnostic has been emitted for
	// this constant. Skip this enum since it may be ill-formed.
	if (!ECD) {
	return;
	}

	// Constants with initalizers are handled in the next loop.
	if (ECD->getInitExpr())
	continue;

	// Duplicate values are handled in the next loop.
	EnumMap.insert({EnumConstantToKey(ECD), ECD});
	}

	if (EnumMap.size() == 0)
	return;

	// Create vectors for any values that has duplicates.
	for (auto *Element : Elements) {
	// The last loop returned if any constant was null.
	EnumConstantDecl *ECD = cast<EnumConstantDecl>(Element);
	if (!ValidDuplicateEnum(ECD, Enum))
	continue;

	auto Iter = EnumMap.find(EnumConstantToKey(ECD));
	if (Iter == EnumMap.end())
	continue;

	DeclOrVector& Entry = Iter->second;
	if (EnumConstantDecl D = Entry.dyn_cast<EnumConstantDecl>()) {
	// Ensure constants are different.
	if (D == ECD)
	continue;

	// Create new vector and push values onto it.
	auto Vec = std::make_unique<ECDVector>();
	Vec->push_back(D);
	Vec->push_back(ECD);

	// Update entry to point to the duplicates vector.
	Entry = Vec.get();

	// Store the vector somewhere we can consult later for quick emission of
	// diagnostics.
	DupVector.emplace_back(std::move(Vec));
	continue;
	}

	ECDVector Vec = Entry.get<ECDVector>();
	// Make sure constants are not added more than once.
	if (*Vec->begin() == ECD)
	continue;

	Vec->push_back(ECD);
	}

	// Emit diagnostics.
	for (const auto &Vec : DupVector) {
	assert(Vec->size() > 1 && "ECDVector should have at least 2 elements.");

	// Emit warning for one enum constant.
	auto *FirstECD = Vec->front();
	S.Diag(FirstECD->getLocation(), diag::warn_duplicate_enum_values)
	<< FirstECD << toString(FirstECD->getInitVal(), 10)
	<< FirstECD->getSourceRange();

	// Emit one note for each of the remaining enum constants with
	// the same value.
	for (auto ECD : llvm::drop_begin(Vec))
	S.Diag(ECD->getLocation(), diag::note_duplicate_element)
	<< ECD << toString(ECD->getInitVal(), 10)
	<< ECD->getSourceRange();
	}
	}

	bool Sema::IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
	bool AllowMask) const {
	assert(ED->isClosedFlag() && "looking for value in non-flag or open enum");
	assert(ED->isCompleteDefinition() && "expected enum definition");

	auto R = FlagBitsCache.insert(std::make_pair(ED, llvm::APInt()));
	llvm::APInt &FlagBits = R.first->second;

	if (R.second) {
	for (auto *E : ED->enumerators()) {
	const auto &EVal = E->getInitVal();
	// Only single-bit enumerators introduce new flag values.
	if (EVal.isPowerOf2())
	FlagBits = FlagBits.zext(EVal.getBitWidth()) \| EVal;
	}
	}

	// A value is in a flag enum if either its bits are a subset of the enum's
	// flag bits (the first condition) or we are allowing masks and the same is
	// true of its complement (the second condition). When masks are allowed, we
	// allow the common idiom of ~(enum1 \| enum2) to be a valid enum value.
	//
	// While it's true that any value could be used as a mask, the assumption is
	// that a mask will have all of the insignificant bits set. Anything else is
	// likely a logic error.
	llvm::APInt FlagMask = ~FlagBits.zextOrTrunc(Val.getBitWidth());
	return !(FlagMask & Val) \|\| (AllowMask && !(FlagMask & ~Val));
	}

	void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange,
	Decl EnumDeclX, ArrayRef<Decl > Elements, Scope *S,
	const ParsedAttributesView &Attrs) {
	EnumDecl *Enum = cast<EnumDecl>(EnumDeclX);
	QualType EnumType = Context.getTypeDeclType(Enum);

	ProcessDeclAttributeList(S, Enum, Attrs);

	if (Enum->isDependentType()) {
	for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
	EnumConstantDecl *ECD =
	cast_or_null<EnumConstantDecl>(Elements[i]);
	if (!ECD) continue;

	ECD->setType(EnumType);
	}

	Enum->completeDefinition(Context.DependentTy, Context.DependentTy, 0, 0);
	return;
	}

	// TODO: If the result value doesn't fit in an int, it must be a long or long
	// long value. ISO C does not support this, but GCC does as an extension,
	// emit a warning.
	unsigned IntWidth = Context.getTargetInfo().getIntWidth();
	unsigned CharWidth = Context.getTargetInfo().getCharWidth();
	unsigned ShortWidth = Context.getTargetInfo().getShortWidth();

	// Verify that all the values are okay, compute the size of the values, and
	// reverse the list.
	unsigned NumNegativeBits = 0;
	unsigned NumPositiveBits = 0;

	for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
	EnumConstantDecl *ECD =
	cast_or_null<EnumConstantDecl>(Elements[i]);
	if (!ECD) continue; // Already issued a diagnostic.

	const llvm::APSInt &InitVal = ECD->getInitVal();

	// Keep track of the size of positive and negative values.
	if (InitVal.isUnsigned() \|\| InitVal.isNonNegative()) {
	// If the enumerator is zero that should still be counted as a positive
	// bit since we need a bit to store the value zero.
	unsigned ActiveBits = InitVal.getActiveBits();
	NumPositiveBits = std::max({NumPositiveBits, ActiveBits, 1u});
	} else {
	NumNegativeBits = std::max(NumNegativeBits,
	(unsigned)InitVal.getMinSignedBits());
	}
	}

	// If we have an empty set of enumerators we still need one bit.
	// From [dcl.enum]p8
	// If the enumerator-list is empty, the values of the enumeration are as if
	// the enumeration had a single enumerator with value 0
	if (!NumPositiveBits && !NumNegativeBits)
	NumPositiveBits = 1;

	// Figure out the type that should be used for this enum.
	QualType BestType;
	unsigned BestWidth;

	// C++0x N3000 [conv.prom]p3:
	// An rvalue of an unscoped enumeration type whose underlying
	// type is not fixed can be converted to an rvalue of the first
	// of the following types that can represent all the values of
	// the enumeration: int, unsigned int, long int, unsigned long
	// int, long long int, or unsigned long long int.
	// C99 6.4.4.3p2:
	// An identifier declared as an enumeration constant has type int.
	// The C99 rule is modified by a gcc extension
	QualType BestPromotionType;

	bool Packed = Enum->hasAttr<PackedAttr>();
	// -fshort-enums is the equivalent to specifying the packed attribute on all
	// enum definitions.
	if (LangOpts.ShortEnums)
	Packed = true;

	// If the enum already has a type because it is fixed or dictated by the
	// target, promote that type instead of analyzing the enumerators.
	if (Enum->isComplete()) {
	BestType = Enum->getIntegerType();
	if (Context.isPromotableIntegerType(BestType))
	BestPromotionType = Context.getPromotedIntegerType(BestType);
	else
	BestPromotionType = BestType;

	BestWidth = Context.getIntWidth(BestType);
	}
	else if (NumNegativeBits) {
	// If there is a negative value, figure out the smallest integer type (of
	// int/long/longlong) that fits.
	// If it's packed, check also if it fits a char or a short.
	if (Packed && NumNegativeBits <= CharWidth && NumPositiveBits < CharWidth) {
	BestType = Context.SignedCharTy;
	BestWidth = CharWidth;
	} else if (Packed && NumNegativeBits <= ShortWidth &&
	NumPositiveBits < ShortWidth) {
	BestType = Context.ShortTy;
	BestWidth = ShortWidth;
	} else if (NumNegativeBits <= IntWidth && NumPositiveBits < IntWidth) {
	BestType = Context.IntTy;
	BestWidth = IntWidth;
	} else {
	BestWidth = Context.getTargetInfo().getLongWidth();

	if (NumNegativeBits <= BestWidth && NumPositiveBits < BestWidth) {
	BestType = Context.LongTy;
	} else {
	BestWidth = Context.getTargetInfo().getLongLongWidth();

	if (NumNegativeBits > BestWidth \|\| NumPositiveBits >= BestWidth)
	Diag(Enum->getLocation(), diag::ext_enum_too_large);
	BestType = Context.LongLongTy;
	}
	}
	BestPromotionType = (BestWidth <= IntWidth ? Context.IntTy : BestType);
	} else {
	// If there is no negative value, figure out the smallest type that fits
	// all of the enumerator values.
	// If it's packed, check also if it fits a char or a short.
	if (Packed && NumPositiveBits <= CharWidth) {
	BestType = Context.UnsignedCharTy;
	BestPromotionType = Context.IntTy;
	BestWidth = CharWidth;
	} else if (Packed && NumPositiveBits <= ShortWidth) {
	BestType = Context.UnsignedShortTy;
	BestPromotionType = Context.IntTy;
	BestWidth = ShortWidth;
	} else if (NumPositiveBits <= IntWidth) {
	BestType = Context.UnsignedIntTy;
	BestWidth = IntWidth;
	BestPromotionType
	= (NumPositiveBits == BestWidth \|\| !getLangOpts().CPlusPlus)
	? Context.UnsignedIntTy : Context.IntTy;
	} else if (NumPositiveBits <=
	(BestWidth = Context.getTargetInfo().getLongWidth())) {
	BestType = Context.UnsignedLongTy;
	BestPromotionType
	= (NumPositiveBits == BestWidth \|\| !getLangOpts().CPlusPlus)
	? Context.UnsignedLongTy : Context.LongTy;
	} else {
	BestWidth = Context.getTargetInfo().getLongLongWidth();
	assert(NumPositiveBits <= BestWidth &&
	"How could an initializer get larger than ULL?");
	BestType = Context.UnsignedLongLongTy;
	BestPromotionType
	= (NumPositiveBits == BestWidth \|\| !getLangOpts().CPlusPlus)
	? Context.UnsignedLongLongTy : Context.LongLongTy;
	}
	}

	// Loop over all of the enumerator constants, changing their types to match
	// the type of the enum if needed.
	for (auto *D : Elements) {
	auto *ECD = cast_or_null<EnumConstantDecl>(D);
	if (!ECD) continue; // Already issued a diagnostic.

	// Standard C says the enumerators have int type, but we allow, as an
	// extension, the enumerators to be larger than int size. If each
	// enumerator value fits in an int, type it as an int, otherwise type it the
	// same as the enumerator decl itself. This means that in "enum { X = 1U }"
	// that X has type 'int', not 'unsigned'.

	// Determine whether the value fits into an int.
	llvm::APSInt InitVal = ECD->getInitVal();

	// If it fits into an integer type, force it. Otherwise force it to match
	// the enum decl type.
	QualType NewTy;
	unsigned NewWidth;
	bool NewSign;
	if (!getLangOpts().CPlusPlus &&
	!Enum->isFixed() &&
	isRepresentableIntegerValue(Context, InitVal, Context.IntTy)) {
	NewTy = Context.IntTy;
	NewWidth = IntWidth;
	NewSign = true;
	} else if (ECD->getType() == BestType) {
	// Already the right type!
	if (getLangOpts().CPlusPlus)
	// C++ [dcl.enum]p4: Following the closing brace of an
	// enum-specifier, each enumerator has the type of its
	// enumeration.
	ECD->setType(EnumType);
	continue;
	} else {
	NewTy = BestType;
	NewWidth = BestWidth;
	NewSign = BestType->isSignedIntegerOrEnumerationType();
	}

	// Adjust the APSInt value.
	InitVal = InitVal.extOrTrunc(NewWidth);
	InitVal.setIsSigned(NewSign);
	ECD->setInitVal(InitVal);

	// Adjust the Expr initializer and type.
	if (ECD->getInitExpr() &&
	!Context.hasSameType(NewTy, ECD->getInitExpr()->getType()))
	ECD->setInitExpr(ImplicitCastExpr::Create(
	Context, NewTy, CK_IntegralCast, ECD->getInitExpr(),
	/base paths/ nullptr, VK_PRValue, FPOptionsOverride()));
	if (getLangOpts().CPlusPlus)
	// C++ [dcl.enum]p4: Following the closing brace of an
	// enum-specifier, each enumerator has the type of its
	// enumeration.
	ECD->setType(EnumType);
	else
	ECD->setType(NewTy);
	}

	Enum->completeDefinition(BestType, BestPromotionType,
	NumPositiveBits, NumNegativeBits);

	CheckForDuplicateEnumValues(*this, Elements, Enum, EnumType);

	if (Enum->isClosedFlag()) {
	for (Decl *D : Elements) {
	EnumConstantDecl *ECD = cast_or_null<EnumConstantDecl>(D);
	if (!ECD) continue; // Already issued a diagnostic.

	llvm::APSInt InitVal = ECD->getInitVal();
	if (InitVal != 0 && !InitVal.isPowerOf2() &&
	!IsValueInFlagEnum(Enum, InitVal, true))
	Diag(ECD->getLocation(), diag::warn_flag_enum_constant_out_of_range)
	<< ECD << Enum;
	}
	}

	// Now that the enum type is defined, ensure it's not been underaligned.
	if (Enum->hasAttrs())
	CheckAlignasUnderalignment(Enum);
	}

	Decl Sema::ActOnFileScopeAsmDecl(Expr expr,
	SourceLocation StartLoc,
	SourceLocation EndLoc) {
	StringLiteral *AsmString = cast<StringLiteral>(expr);

	FileScopeAsmDecl *New = FileScopeAsmDecl::Create(Context, CurContext,
	AsmString, StartLoc,
	EndLoc);
	CurContext->addDecl(New);
	return New;
	}

	Decl Sema::ActOnTopLevelStmtDecl(Stmt Statement) {
	auto *New = TopLevelStmtDecl::Create(Context, Statement);
	Context.getTranslationUnitDecl()->addDecl(New);
	return New;
	}

	void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name,
	IdentifierInfo* AliasName,
	SourceLocation PragmaLoc,
	SourceLocation NameLoc,
	SourceLocation AliasNameLoc) {
	NamedDecl *PrevDecl = LookupSingleName(TUScope, Name, NameLoc,
	LookupOrdinaryName);
	AttributeCommonInfo Info(AliasName, SourceRange(AliasNameLoc),
	AttributeCommonInfo::AS_Pragma);
	AsmLabelAttr *Attr = AsmLabelAttr::CreateImplicit(
	Context, AliasName->getName(), /IsLiteralLabel=/true, Info);

	// If a declaration that:
	// 1) declares a function or a variable
	// 2) has external linkage
	// already exists, add a label attribute to it.
	if (PrevDecl && (isa<FunctionDecl>(PrevDecl) \|\| isa<VarDecl>(PrevDecl))) {
	if (isDeclExternC(PrevDecl))
	PrevDecl->addAttr(Attr);
	else
	Diag(PrevDecl->getLocation(), diag::warn_redefine_extname_not_applied)
	<< /Variable/(isa<FunctionDecl>(PrevDecl) ? 0 : 1) << PrevDecl;
	// Otherwise, add a label attribute to ExtnameUndeclaredIdentifiers.
	} else
	(void)ExtnameUndeclaredIdentifiers.insert(std::make_pair(Name, Attr));
	}

	void Sema::ActOnPragmaWeakID(IdentifierInfo* Name,
	SourceLocation PragmaLoc,
	SourceLocation NameLoc) {
	Decl *PrevDecl = LookupSingleName(TUScope, Name, NameLoc, LookupOrdinaryName);

	if (PrevDecl) {
	PrevDecl->addAttr(WeakAttr::CreateImplicit(Context, PragmaLoc, AttributeCommonInfo::AS_Pragma));
	} else {
	(void)WeakUndeclaredIdentifiers[Name].insert(WeakInfo(nullptr, NameLoc));
	}
	}

	void Sema::ActOnPragmaWeakAlias(IdentifierInfo* Name,
	IdentifierInfo* AliasName,
	SourceLocation PragmaLoc,
	SourceLocation NameLoc,
	SourceLocation AliasNameLoc) {
	Decl *PrevDecl = LookupSingleName(TUScope, AliasName, AliasNameLoc,
	LookupOrdinaryName);
	WeakInfo W = WeakInfo(Name, NameLoc);

	if (PrevDecl && (isa<FunctionDecl>(PrevDecl) \|\| isa<VarDecl>(PrevDecl))) {
	if (!PrevDecl->hasAttr<AliasAttr>())
	if (NamedDecl *ND = dyn_cast<NamedDecl>(PrevDecl))
	DeclApplyPragmaWeak(TUScope, ND, W);
	} else {
	(void)WeakUndeclaredIdentifiers[AliasName].insert(W);
	}
	}

	ObjCContainerDecl *Sema::getObjCDeclContext() const {
	return (dyn_cast_or_null<ObjCContainerDecl>(CurContext));
	}

	Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD,
	bool Final) {
	assert(FD && "Expected non-null FunctionDecl");

	// SYCL functions can be template, so we check if they have appropriate
	// attribute prior to checking if it is a template.
	if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>())
	return FunctionEmissionStatus::Emitted;

	// Templates are emitted when they're instantiated.
	if (FD->isDependentContext())
	return FunctionEmissionStatus::TemplateDiscarded;

	// Check whether this function is an externally visible definition.
	auto IsEmittedForExternalSymbol = [this, FD]() {
	// We have to check the GVA linkage of the function's definition -- if we
	// only have a declaration, we don't know whether or not the function will
	// be emitted, because (say) the definition could include "inline".
	FunctionDecl *Def = FD->getDefinition();

	return Def && !isDiscardableGVALinkage(
	getASTContext().GetGVALinkageForFunction(Def));
	};

	if (LangOpts.OpenMPIsDevice) {
	// In OpenMP device mode we will not emit host only functions, or functions
	// we don't need due to their linkage.
	std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
	OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
	// DevTy may be changed later by
	// #pragma omp declare target to() device_type().
	// Therefore DevTy having no value does not imply host. The emission status
	// will be checked again at the end of compilation unit with Final = true.
	if (DevTy)
	if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host)
	return FunctionEmissionStatus::OMPDiscarded;
	// If we have an explicit value for the device type, or we are in a target
	// declare context, we need to emit all extern and used symbols.
	if (isInOpenMPDeclareTargetContext() \|\| DevTy)
	if (IsEmittedForExternalSymbol())
	return FunctionEmissionStatus::Emitted;
	// Device mode only emits what it must, if it wasn't tagged yet and needed,
	// we'll omit it.
	if (Final)
	return FunctionEmissionStatus::OMPDiscarded;
	} else if (LangOpts.OpenMP > 45) {
	// In OpenMP host compilation prior to 5.0 everything was an emitted host
	// function. In 5.0, no_host was introduced which might cause a function to
	// be ommitted.
	std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
	OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
	if (DevTy)
	if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
	return FunctionEmissionStatus::OMPDiscarded;
	}

	if (Final && LangOpts.OpenMP && !LangOpts.CUDA)
	return FunctionEmissionStatus::Emitted;

	if (LangOpts.CUDA) {
	// When compiling for device, host functions are never emitted. Similarly,
	// when compiling for host, device and global functions are never emitted.
	// (Technically, we do emit a host-side stub for global functions, but this
	// doesn't count for our purposes here.)
	Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD);
	if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host)
	return FunctionEmissionStatus::CUDADiscarded;
	if (!LangOpts.CUDAIsDevice &&
	(T == Sema::CFT_Device \|\| T == Sema::CFT_Global))
	return FunctionEmissionStatus::CUDADiscarded;

	if (IsEmittedForExternalSymbol())
	return FunctionEmissionStatus::Emitted;
	}

	// Otherwise, the function is known-emitted if it's in our set of
	// known-emitted functions.
	return FunctionEmissionStatus::Unknown;
	}

	bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) {
	// Host-side references to a __global__ function refer to the stub, so the
	// function itself is never emitted and therefore should not be marked.
	// If we have host fn calls kernel fn calls host+device, the HD function
	// does not get instantiated on the host. We model this by omitting at the
	// call to the kernel from the callgraph. This ensures that, when compiling
	// for host, only HD functions actually called from the host get marked as
	// known-emitted.
	return LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
	IdentifyCUDATarget(Callee) == CFT_Global;
	}
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaInit.cpp b/contrib/llvm-project/clang/lib/Sema/SemaInit.cpp
	index 44adb167dcc0..cc8d1405ec55 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaInit.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaInit.cpp
	@@ -1,10599 +1,10666 @@
	//===--- SemaInit.cpp - Semantic Analysis for Initializers ----------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements semantic analysis for initializers.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/AST/ExprOpenMP.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/Basic/CharInfo.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Sema/Designator.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/SemaInternal.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/PointerIntPair.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/raw_ostream.h"

	using namespace clang;

	//===----------------------------------------------------------------------===//
	// Sema Initialization Checking
	//===----------------------------------------------------------------------===//

	/// Check whether T is compatible with a wide character type (wchar_t,
	/// char16_t or char32_t).
	static bool IsWideCharCompatible(QualType T, ASTContext &Context) {
	if (Context.typesAreCompatible(Context.getWideCharType(), T))
	return true;
	if (Context.getLangOpts().CPlusPlus \|\| Context.getLangOpts().C11) {
	return Context.typesAreCompatible(Context.Char16Ty, T) \|\|
	Context.typesAreCompatible(Context.Char32Ty, T);
	}
	return false;
	}

	enum StringInitFailureKind {
	SIF_None,
	SIF_NarrowStringIntoWideChar,
	SIF_WideStringIntoChar,
	SIF_IncompatWideStringIntoWideChar,
	SIF_UTF8StringIntoPlainChar,
	SIF_PlainStringIntoUTF8Char,
	SIF_Other
	};

	/// Check whether the array of type AT can be initialized by the Init
	/// expression by means of string initialization. Returns SIF_None if so,
	/// otherwise returns a StringInitFailureKind that describes why the
	/// initialization would not work.
	static StringInitFailureKind IsStringInit(Expr Init, const ArrayType AT,
	ASTContext &Context) {
	if (!isa<ConstantArrayType>(AT) && !isa<IncompleteArrayType>(AT))
	return SIF_Other;

	// See if this is a string literal or @encode.
	Init = Init->IgnoreParens();

	// Handle @encode, which is a narrow string.
	if (isa<ObjCEncodeExpr>(Init) && AT->getElementType()->isCharType())
	return SIF_None;

	// Otherwise we can only handle string literals.
	StringLiteral *SL = dyn_cast<StringLiteral>(Init);
	if (!SL)
	return SIF_Other;

	const QualType ElemTy =
	Context.getCanonicalType(AT->getElementType()).getUnqualifiedType();

	auto IsCharOrUnsignedChar = [](const QualType &T) {
	const BuiltinType *BT = dyn_cast<BuiltinType>(T.getTypePtr());
	return BT && BT->isCharType() && BT->getKind() != BuiltinType::SChar;
	};

	switch (SL->getKind()) {
	case StringLiteral::UTF8:
	// char8_t array can be initialized with a UTF-8 string.
	// - C++20 [dcl.init.string] (DR)
	// Additionally, an array of char or unsigned char may be initialized
	// by a UTF-8 string literal.
	if (ElemTy->isChar8Type() \|\|
	(Context.getLangOpts().Char8 &&
	IsCharOrUnsignedChar(ElemTy.getCanonicalType())))
	return SIF_None;
	[[fallthrough]];
	case StringLiteral::Ordinary:
	// char array can be initialized with a narrow string.
	// Only allow char x[] = "foo"; not char x[] = L"foo";
	if (ElemTy->isCharType())
	return (SL->getKind() == StringLiteral::UTF8 &&
	Context.getLangOpts().Char8)
	? SIF_UTF8StringIntoPlainChar
	: SIF_None;
	if (ElemTy->isChar8Type())
	return SIF_PlainStringIntoUTF8Char;
	if (IsWideCharCompatible(ElemTy, Context))
	return SIF_NarrowStringIntoWideChar;
	return SIF_Other;
	// C99 6.7.8p15 (with correction from DR343), or C11 6.7.9p15:
	// "An array with element type compatible with a qualified or unqualified
	// version of wchar_t, char16_t, or char32_t may be initialized by a wide
	// string literal with the corresponding encoding prefix (L, u, or U,
	// respectively), optionally enclosed in braces.
	case StringLiteral::UTF16:
	if (Context.typesAreCompatible(Context.Char16Ty, ElemTy))
	return SIF_None;
	if (ElemTy->isCharType() \|\| ElemTy->isChar8Type())
	return SIF_WideStringIntoChar;
	if (IsWideCharCompatible(ElemTy, Context))
	return SIF_IncompatWideStringIntoWideChar;
	return SIF_Other;
	case StringLiteral::UTF32:
	if (Context.typesAreCompatible(Context.Char32Ty, ElemTy))
	return SIF_None;
	if (ElemTy->isCharType() \|\| ElemTy->isChar8Type())
	return SIF_WideStringIntoChar;
	if (IsWideCharCompatible(ElemTy, Context))
	return SIF_IncompatWideStringIntoWideChar;
	return SIF_Other;
	case StringLiteral::Wide:
	if (Context.typesAreCompatible(Context.getWideCharType(), ElemTy))
	return SIF_None;
	if (ElemTy->isCharType() \|\| ElemTy->isChar8Type())
	return SIF_WideStringIntoChar;
	if (IsWideCharCompatible(ElemTy, Context))
	return SIF_IncompatWideStringIntoWideChar;
	return SIF_Other;
	}

	llvm_unreachable("missed a StringLiteral kind?");
	}

	static StringInitFailureKind IsStringInit(Expr *init, QualType declType,
	ASTContext &Context) {
	const ArrayType *arrayType = Context.getAsArrayType(declType);
	if (!arrayType)
	return SIF_Other;
	return IsStringInit(init, arrayType, Context);
	}

	bool Sema::IsStringInit(Expr Init, const ArrayType AT) {
	return ::IsStringInit(Init, AT, Context) == SIF_None;
	}

	/// Update the type of a string literal, including any surrounding parentheses,
	/// to match the type of the object which it is initializing.
	static void updateStringLiteralType(Expr *E, QualType Ty) {
	while (true) {
	E->setType(Ty);
	E->setValueKind(VK_PRValue);
	if (isa<StringLiteral>(E) \|\| isa<ObjCEncodeExpr>(E)) {
	break;
	} else if (ParenExpr *PE = dyn_cast<ParenExpr>(E)) {
	E = PE->getSubExpr();
	} else if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
	assert(UO->getOpcode() == UO_Extension);
	E = UO->getSubExpr();
	} else if (GenericSelectionExpr *GSE = dyn_cast<GenericSelectionExpr>(E)) {
	E = GSE->getResultExpr();
	} else if (ChooseExpr *CE = dyn_cast<ChooseExpr>(E)) {
	E = CE->getChosenSubExpr();
	} else {
	llvm_unreachable("unexpected expr in string literal init");
	}
	}
	}

	/// Fix a compound literal initializing an array so it's correctly marked
	/// as an rvalue.
	static void updateGNUCompoundLiteralRValue(Expr *E) {
	while (true) {
	E->setValueKind(VK_PRValue);
	if (isa<CompoundLiteralExpr>(E)) {
	break;
	} else if (ParenExpr *PE = dyn_cast<ParenExpr>(E)) {
	E = PE->getSubExpr();
	} else if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
	assert(UO->getOpcode() == UO_Extension);
	E = UO->getSubExpr();
	} else if (GenericSelectionExpr *GSE = dyn_cast<GenericSelectionExpr>(E)) {
	E = GSE->getResultExpr();
	} else if (ChooseExpr *CE = dyn_cast<ChooseExpr>(E)) {
	E = CE->getChosenSubExpr();
	} else {
	llvm_unreachable("unexpected expr in array compound literal init");
	}
	}
	}

	static void CheckStringInit(Expr Str, QualType &DeclT, const ArrayType AT,
	Sema &S) {
	// Get the length of the string as parsed.
	auto *ConstantArrayTy =
	cast<ConstantArrayType>(Str->getType()->getAsArrayTypeUnsafe());
	uint64_t StrLength = ConstantArrayTy->getSize().getZExtValue();

	if (const IncompleteArrayType *IAT = dyn_cast<IncompleteArrayType>(AT)) {
	// C99 6.7.8p14. We have an array of character type with unknown size
	// being initialized to a string literal.
	llvm::APInt ConstVal(32, StrLength);
	// Return a new array type (C99 6.7.8p22).
	DeclT = S.Context.getConstantArrayType(IAT->getElementType(),
	ConstVal, nullptr,
	ArrayType::Normal, 0);
	updateStringLiteralType(Str, DeclT);
	return;
	}

	const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);

	// We have an array of character type with known size. However,
	// the size may be smaller or larger than the string we are initializing.
	// FIXME: Avoid truncation for 64-bit length strings.
	if (S.getLangOpts().CPlusPlus) {
	if (StringLiteral *SL = dyn_cast<StringLiteral>(Str->IgnoreParens())) {
	// For Pascal strings it's OK to strip off the terminating null character,
	// so the example below is valid:
	//
	// unsigned char a[2] = "\pa";
	if (SL->isPascal())
	StrLength--;
	}

	// [dcl.init.string]p2
	if (StrLength > CAT->getSize().getZExtValue())
	S.Diag(Str->getBeginLoc(),
	diag::err_initializer_string_for_char_array_too_long)
	<< CAT->getSize().getZExtValue() << StrLength
	<< Str->getSourceRange();
	} else {
	// C99 6.7.8p14.
	if (StrLength-1 > CAT->getSize().getZExtValue())
	S.Diag(Str->getBeginLoc(),
	diag::ext_initializer_string_for_char_array_too_long)
	<< Str->getSourceRange();
	}

	// Set the type to the actual size that we are initializing. If we have
	// something like:
	// char x[1] = "foo";
	// then this will set the string literal's type to char[1].
	updateStringLiteralType(Str, DeclT);
	}

	//===----------------------------------------------------------------------===//
	// Semantic checking for initializer lists.
	//===----------------------------------------------------------------------===//

	namespace {

	/// Semantic checking for initializer lists.
	///
	/// The InitListChecker class contains a set of routines that each
	/// handle the initialization of a certain kind of entity, e.g.,
	/// arrays, vectors, struct/union types, scalars, etc. The
	/// InitListChecker itself performs a recursive walk of the subobject
	/// structure of the type to be initialized, while stepping through
	/// the initializer list one element at a time. The IList and Index
	/// parameters to each of the Check* routines contain the active
	/// (syntactic) initializer list and the index into that initializer
	/// list that represents the current initializer. Each routine is
	/// responsible for moving that Index forward as it consumes elements.
	///
	/// Each Check* routine also has a StructuredList/StructuredIndex
	/// arguments, which contains the current "structured" (semantic)
	/// initializer list and the index into that initializer list where we
	/// are copying initializers as we map them over to the semantic
	/// list. Once we have completed our recursive walk of the subobject
	/// structure, we will have constructed a full semantic initializer
	/// list.
	///
	/// C99 designators cause changes in the initializer list traversal,
	/// because they make the initialization "jump" into a specific
	/// subobject and then continue the initialization from that
	/// point. CheckDesignatedInitializer() recursively steps into the
	/// designated subobject and manages backing out the recursion to
	/// initialize the subobjects after the one designated.
	///
	/// If an initializer list contains any designators, we build a placeholder
	/// structured list even in 'verify only' mode, so that we can track which
	/// elements need 'empty' initializtion.
	class InitListChecker {
	Sema &SemaRef;
	bool hadError = false;
	bool VerifyOnly; // No diagnostics.
	bool TreatUnavailableAsInvalid; // Used only in VerifyOnly mode.
	bool InOverloadResolution;
	InitListExpr *FullyStructuredList = nullptr;
	NoInitExpr *DummyExpr = nullptr;

	NoInitExpr *getDummyInit() {
	if (!DummyExpr)
	DummyExpr = new (SemaRef.Context) NoInitExpr(SemaRef.Context.VoidTy);
	return DummyExpr;
	}

	void CheckImplicitInitList(const InitializedEntity &Entity,
	InitListExpr *ParentIList, QualType T,
	unsigned &Index, InitListExpr *StructuredList,
	unsigned &StructuredIndex);
	void CheckExplicitInitList(const InitializedEntity &Entity,
	InitListExpr *IList, QualType &T,
	InitListExpr *StructuredList,
	bool TopLevelObject = false);
	void CheckListElementTypes(const InitializedEntity &Entity,
	InitListExpr *IList, QualType &DeclType,
	bool SubobjectIsDesignatorContext,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool TopLevelObject = false);
	void CheckSubElementType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType ElemType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool DirectlyDesignated = false);
	void CheckComplexType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex);
	void CheckScalarType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex);
	void CheckReferenceType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex);
	void CheckVectorType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType, unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex);
	void CheckStructUnionTypes(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	CXXRecordDecl::base_class_range Bases,
	RecordDecl::field_iterator Field,
	bool SubobjectIsDesignatorContext, unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool TopLevelObject = false);
	void CheckArrayType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType &DeclType,
	llvm::APSInt elementIndex,
	bool SubobjectIsDesignatorContext, unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex);
	bool CheckDesignatedInitializer(const InitializedEntity &Entity,
	InitListExpr IList, DesignatedInitExpr DIE,
	unsigned DesigIdx,
	QualType &CurrentObjectType,
	RecordDecl::field_iterator *NextField,
	llvm::APSInt *NextElementIndex,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool FinishSubobjectInit,
	bool TopLevelObject);
	InitListExpr getStructuredSubobjectInit(InitListExpr IList, unsigned Index,
	QualType CurrentObjectType,
	InitListExpr *StructuredList,
	unsigned StructuredIndex,
	SourceRange InitRange,
	bool IsFullyOverwritten = false);
	void UpdateStructuredListElement(InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	Expr *expr);
	InitListExpr *createInitListExpr(QualType CurrentObjectType,
	SourceRange InitRange,
	unsigned ExpectedNumInits);
	int numArrayElements(QualType DeclType);
	int numStructUnionElements(QualType DeclType);

	ExprResult PerformEmptyInit(SourceLocation Loc,
	const InitializedEntity &Entity);

	/// Diagnose that OldInit (or part thereof) has been overridden by NewInit.
	void diagnoseInitOverride(Expr *OldInit, SourceRange NewInitRange,
	bool FullyOverwritten = true) {
	// Overriding an initializer via a designator is valid with C99 designated
	// initializers, but ill-formed with C++20 designated initializers.
	unsigned DiagID = SemaRef.getLangOpts().CPlusPlus
	? diag::ext_initializer_overrides
	: diag::warn_initializer_overrides;

	if (InOverloadResolution && SemaRef.getLangOpts().CPlusPlus) {
	// In overload resolution, we have to strictly enforce the rules, and so
	// don't allow any overriding of prior initializers. This matters for a
	// case such as:
	//
	// union U { int a, b; };
	// struct S { int a, b; };
	// void f(U), f(S);
	//
	// Here, f({.a = 1, .b = 2}) is required to call the struct overload. For
	// consistency, we disallow all overriding of prior initializers in
	// overload resolution, not only overriding of union members.
	hadError = true;
	} else if (OldInit->getType().isDestructedType() && !FullyOverwritten) {
	// If we'll be keeping around the old initializer but overwriting part of
	// the object it initialized, and that object is not trivially
	// destructible, this can leak. Don't allow that, not even as an
	// extension.
	//
	// FIXME: It might be reasonable to allow this in cases where the part of
	// the initializer that we're overriding has trivial destruction.
	DiagID = diag::err_initializer_overrides_destructed;
	} else if (!OldInit->getSourceRange().isValid()) {
	// We need to check on source range validity because the previous
	// initializer does not have to be an explicit initializer. e.g.,
	//
	// struct P { int a, b; };
	// struct PP { struct P p } l = { { .a = 2 }, .p.b = 3 };
	//
	// There is an overwrite taking place because the first braced initializer
	// list "{ .a = 2 }" already provides value for .p.b (which is zero).
	//
	// Such overwrites are harmless, so we don't diagnose them. (Note that in
	// C++, this cannot be reached unless we've already seen and diagnosed a
	// different conformance issue, such as a mixture of designated and
	// non-designated initializers or a multi-level designator.)
	return;
	}

	if (!VerifyOnly) {
	SemaRef.Diag(NewInitRange.getBegin(), DiagID)
	<< NewInitRange << FullyOverwritten << OldInit->getType();
	SemaRef.Diag(OldInit->getBeginLoc(), diag::note_previous_initializer)
	<< (OldInit->HasSideEffects(SemaRef.Context) && FullyOverwritten)
	<< OldInit->getSourceRange();
	}
	}

	// Explanation on the "FillWithNoInit" mode:
	//
	// Assume we have the following definitions (Case#1):
	// struct P { char x[6][6]; } xp = { .x[1] = "bar" };
	// struct PP { struct P lp; } l = { .lp = xp, .lp.x[1][2] = 'f' };
	//
	// l.lp.x[1][0..1] should not be filled with implicit initializers because the
	// "base" initializer "xp" will provide values for them; l.lp.x[1] will be "baf".
	//
	// But if we have (Case#2):
	// struct PP l = { .lp = xp, .lp.x[1] = { [2] = 'f' } };
	//
	// l.lp.x[1][0..1] are implicitly initialized and do not use values from the
	// "base" initializer; l.lp.x[1] will be "\0\0f\0\0\0".
	//
	// To distinguish Case#1 from Case#2, and also to avoid leaving many "holes"
	// in the InitListExpr, the "holes" in Case#1 are filled not with empty
	// initializers but with special "NoInitExpr" place holders, which tells the
	// CodeGen not to generate any initializers for these parts.
	void FillInEmptyInitForBase(unsigned Init, const CXXBaseSpecifier &Base,
	const InitializedEntity &ParentEntity,
	InitListExpr *ILE, bool &RequiresSecondPass,
	bool FillWithNoInit);
	void FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
	const InitializedEntity &ParentEntity,
	InitListExpr *ILE, bool &RequiresSecondPass,
	bool FillWithNoInit = false);
	void FillInEmptyInitializations(const InitializedEntity &Entity,
	InitListExpr *ILE, bool &RequiresSecondPass,
	InitListExpr *OuterILE, unsigned OuterIndex,
	bool FillWithNoInit = false);
	bool CheckFlexibleArrayInit(const InitializedEntity &Entity,
	Expr InitExpr, FieldDecl Field,
	bool TopLevelObject);
	void CheckEmptyInitializable(const InitializedEntity &Entity,
	SourceLocation Loc);

	public:
	InitListChecker(Sema &S, const InitializedEntity &Entity, InitListExpr *IL,
	QualType &T, bool VerifyOnly, bool TreatUnavailableAsInvalid,
	bool InOverloadResolution = false);
	bool HadError() { return hadError; }

	// Retrieves the fully-structured initializer list used for
	// semantic analysis and code generation.
	InitListExpr *getFullyStructuredList() const { return FullyStructuredList; }
	};

	} // end anonymous namespace

	ExprResult InitListChecker::PerformEmptyInit(SourceLocation Loc,
	const InitializedEntity &Entity) {
	InitializationKind Kind = InitializationKind::CreateValue(Loc, Loc, Loc,
	true);
	MultiExprArg SubInit;
	Expr *InitExpr;
	InitListExpr DummyInitList(SemaRef.Context, Loc, std::nullopt, Loc);

	// C++ [dcl.init.aggr]p7:
	// If there are fewer initializer-clauses in the list than there are
	// members in the aggregate, then each member not explicitly initialized
	// ...
	bool EmptyInitList = SemaRef.getLangOpts().CPlusPlus11 &&
	Entity.getType()->getBaseElementTypeUnsafe()->isRecordType();
	if (EmptyInitList) {
	// C++1y / DR1070:
	// shall be initialized [...] from an empty initializer list.
	//
	// We apply the resolution of this DR to C++11 but not C++98, since C++98
	// does not have useful semantics for initialization from an init list.
	// We treat this as copy-initialization, because aggregate initialization
	// always performs copy-initialization on its elements.
	//
	// Only do this if we're initializing a class type, to avoid filling in
	// the initializer list where possible.
	InitExpr = VerifyOnly
	? &DummyInitList
	: new (SemaRef.Context)
	InitListExpr(SemaRef.Context, Loc, std::nullopt, Loc);
	InitExpr->setType(SemaRef.Context.VoidTy);
	SubInit = InitExpr;
	Kind = InitializationKind::CreateCopy(Loc, Loc);
	} else {
	// C++03:
	// shall be value-initialized.
	}

	InitializationSequence InitSeq(SemaRef, Entity, Kind, SubInit);
	// libstdc++4.6 marks the vector default constructor as explicit in
	// _GLIBCXX_DEBUG mode, so recover using the C++03 logic in that case.
	// stlport does so too. Look for std::__debug for libstdc++, and for
	// std:: for stlport. This is effectively a compiler-side implementation of
	// LWG2193.
	if (!InitSeq && EmptyInitList && InitSeq.getFailureKind() ==
	InitializationSequence::FK_ExplicitConstructor) {
	OverloadCandidateSet::iterator Best;
	OverloadingResult O =
	InitSeq.getFailedCandidateSet()
	.BestViableFunction(SemaRef, Kind.getLocation(), Best);
	(void)O;
	assert(O == OR_Success && "Inconsistent overload resolution");
	CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
	CXXRecordDecl *R = CtorDecl->getParent();

	if (CtorDecl->getMinRequiredArguments() == 0 &&
	CtorDecl->isExplicit() && R->getDeclName() &&
	SemaRef.SourceMgr.isInSystemHeader(CtorDecl->getLocation())) {
	bool IsInStd = false;
	for (NamespaceDecl *ND = dyn_cast<NamespaceDecl>(R->getDeclContext());
	ND && !IsInStd; ND = dyn_cast<NamespaceDecl>(ND->getParent())) {
	if (SemaRef.getStdNamespace()->InEnclosingNamespaceSetOf(ND))
	IsInStd = true;
	}

	if (IsInStd && llvm::StringSwitch<bool>(R->getName())
	.Cases("basic_string", "deque", "forward_list", true)
	.Cases("list", "map", "multimap", "multiset", true)
	.Cases("priority_queue", "queue", "set", "stack", true)
	.Cases("unordered_map", "unordered_set", "vector", true)
	.Default(false)) {
	InitSeq.InitializeFrom(
	SemaRef, Entity,
	InitializationKind::CreateValue(Loc, Loc, Loc, true),
	MultiExprArg(), /TopLevelOfInitList=/false,
	TreatUnavailableAsInvalid);
	// Emit a warning for this. System header warnings aren't shown
	// by default, but people working on system headers should see it.
	if (!VerifyOnly) {
	SemaRef.Diag(CtorDecl->getLocation(),
	diag::warn_invalid_initializer_from_system_header);
	if (Entity.getKind() == InitializedEntity::EK_Member)
	SemaRef.Diag(Entity.getDecl()->getLocation(),
	diag::note_used_in_initialization_here);
	else if (Entity.getKind() == InitializedEntity::EK_ArrayElement)
	SemaRef.Diag(Loc, diag::note_used_in_initialization_here);
	}
	}
	}
	}
	if (!InitSeq) {
	if (!VerifyOnly) {
	InitSeq.Diagnose(SemaRef, Entity, Kind, SubInit);
	if (Entity.getKind() == InitializedEntity::EK_Member)
	SemaRef.Diag(Entity.getDecl()->getLocation(),
	diag::note_in_omitted_aggregate_initializer)
	<< /field/1 << Entity.getDecl();
	else if (Entity.getKind() == InitializedEntity::EK_ArrayElement) {
	bool IsTrailingArrayNewMember =
	Entity.getParent() &&
	Entity.getParent()->isVariableLengthArrayNew();
	SemaRef.Diag(Loc, diag::note_in_omitted_aggregate_initializer)
	<< (IsTrailingArrayNewMember ? 2 : /array element/0)
	<< Entity.getElementIndex();
	}
	}
	hadError = true;
	return ExprError();
	}

	return VerifyOnly ? ExprResult()
	: InitSeq.Perform(SemaRef, Entity, Kind, SubInit);
	}

	void InitListChecker::CheckEmptyInitializable(const InitializedEntity &Entity,
	SourceLocation Loc) {
	// If we're building a fully-structured list, we'll check this at the end
	// once we know which elements are actually initialized. Otherwise, we know
	// that there are no designators so we can just check now.
	if (FullyStructuredList)
	return;
	PerformEmptyInit(Loc, Entity);
	}

	void InitListChecker::FillInEmptyInitForBase(
	unsigned Init, const CXXBaseSpecifier &Base,
	const InitializedEntity &ParentEntity, InitListExpr *ILE,
	bool &RequiresSecondPass, bool FillWithNoInit) {
	InitializedEntity BaseEntity = InitializedEntity::InitializeBase(
	SemaRef.Context, &Base, false, &ParentEntity);

	if (Init >= ILE->getNumInits() \|\| !ILE->getInit(Init)) {
	ExprResult BaseInit = FillWithNoInit
	? new (SemaRef.Context) NoInitExpr(Base.getType())
	: PerformEmptyInit(ILE->getEndLoc(), BaseEntity);
	if (BaseInit.isInvalid()) {
	hadError = true;
	return;
	}

	if (!VerifyOnly) {
	assert(Init < ILE->getNumInits() && "should have been expanded");
	ILE->setInit(Init, BaseInit.getAs<Expr>());
	}
	} else if (InitListExpr *InnerILE =
	dyn_cast<InitListExpr>(ILE->getInit(Init))) {
	FillInEmptyInitializations(BaseEntity, InnerILE, RequiresSecondPass,
	ILE, Init, FillWithNoInit);
	} else if (DesignatedInitUpdateExpr *InnerDIUE =
	dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init))) {
	FillInEmptyInitializations(BaseEntity, InnerDIUE->getUpdater(),
	RequiresSecondPass, ILE, Init,
	/FillWithNoInit =/true);
	}
	}

	void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
	const InitializedEntity &ParentEntity,
	InitListExpr *ILE,
	bool &RequiresSecondPass,
	bool FillWithNoInit) {
	SourceLocation Loc = ILE->getEndLoc();
	unsigned NumInits = ILE->getNumInits();
	InitializedEntity MemberEntity
	= InitializedEntity::InitializeMember(Field, &ParentEntity);

	if (Init >= NumInits \|\| !ILE->getInit(Init)) {
	if (const RecordType *RType = ILE->getType()->getAs<RecordType>())
	if (!RType->getDecl()->isUnion())
	assert((Init < NumInits \|\| VerifyOnly) &&
	"This ILE should have been expanded");

	if (FillWithNoInit) {
	assert(!VerifyOnly && "should not fill with no-init in verify-only mode");
	Expr *Filler = new (SemaRef.Context) NoInitExpr(Field->getType());
	if (Init < NumInits)
	ILE->setInit(Init, Filler);
	else
	ILE->updateInit(SemaRef.Context, Init, Filler);
	return;
	}
	// C++1y [dcl.init.aggr]p7:
	// If there are fewer initializer-clauses in the list than there are
	// members in the aggregate, then each member not explicitly initialized
	// shall be initialized from its brace-or-equal-initializer [...]
	if (Field->hasInClassInitializer()) {
	if (VerifyOnly)
	return;

	ExprResult DIE = SemaRef.BuildCXXDefaultInitExpr(Loc, Field);
	if (DIE.isInvalid()) {
	hadError = true;
	return;
	}
	SemaRef.checkInitializerLifetime(MemberEntity, DIE.get());
	if (Init < NumInits)
	ILE->setInit(Init, DIE.get());
	else {
	ILE->updateInit(SemaRef.Context, Init, DIE.get());
	RequiresSecondPass = true;
	}
	return;
	}

	if (Field->getType()->isReferenceType()) {
	if (!VerifyOnly) {
	// C++ [dcl.init.aggr]p9:
	// If an incomplete or empty initializer-list leaves a
	// member of reference type uninitialized, the program is
	// ill-formed.
	SemaRef.Diag(Loc, diag::err_init_reference_member_uninitialized)
	<< Field->getType()
	<< (ILE->isSyntacticForm() ? ILE : ILE->getSyntacticForm())
	->getSourceRange();
	SemaRef.Diag(Field->getLocation(), diag::note_uninit_reference_member);
	}
	hadError = true;
	return;
	}

	ExprResult MemberInit = PerformEmptyInit(Loc, MemberEntity);
	if (MemberInit.isInvalid()) {
	hadError = true;
	return;
	}

	if (hadError \|\| VerifyOnly) {
	// Do nothing
	} else if (Init < NumInits) {
	ILE->setInit(Init, MemberInit.getAs<Expr>());
	} else if (!isa<ImplicitValueInitExpr>(MemberInit.get())) {
	// Empty initialization requires a constructor call, so
	// extend the initializer list to include the constructor
	// call and make a note that we'll need to take another pass
	// through the initializer list.
	ILE->updateInit(SemaRef.Context, Init, MemberInit.getAs<Expr>());
	RequiresSecondPass = true;
	}
	} else if (InitListExpr *InnerILE
	= dyn_cast<InitListExpr>(ILE->getInit(Init))) {
	FillInEmptyInitializations(MemberEntity, InnerILE,
	RequiresSecondPass, ILE, Init, FillWithNoInit);
	} else if (DesignatedInitUpdateExpr *InnerDIUE =
	dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init))) {
	FillInEmptyInitializations(MemberEntity, InnerDIUE->getUpdater(),
	RequiresSecondPass, ILE, Init,
	/FillWithNoInit =/true);
	}
	}

	/// Recursively replaces NULL values within the given initializer list
	/// with expressions that perform value-initialization of the
	/// appropriate type, and finish off the InitListExpr formation.
	void
	InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
	InitListExpr *ILE,
	bool &RequiresSecondPass,
	InitListExpr *OuterILE,
	unsigned OuterIndex,
	bool FillWithNoInit) {
	assert((ILE->getType() != SemaRef.Context.VoidTy) &&
	"Should not have void type");

	// We don't need to do any checks when just filling NoInitExprs; that can't
	// fail.
	if (FillWithNoInit && VerifyOnly)
	return;

	// If this is a nested initializer list, we might have changed its contents
	// (and therefore some of its properties, such as instantiation-dependence)
	// while filling it in. Inform the outer initializer list so that its state
	// can be updated to match.
	// FIXME: We should fully build the inner initializers before constructing
	// the outer InitListExpr instead of mutating AST nodes after they have
	// been used as subexpressions of other nodes.
	struct UpdateOuterILEWithUpdatedInit {
	InitListExpr *Outer;
	unsigned OuterIndex;
	~UpdateOuterILEWithUpdatedInit() {
	if (Outer)
	Outer->setInit(OuterIndex, Outer->getInit(OuterIndex));
	}
	} UpdateOuterRAII = {OuterILE, OuterIndex};

	// A transparent ILE is not performing aggregate initialization and should
	// not be filled in.
	if (ILE->isTransparent())
	return;

	if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) {
	const RecordDecl *RDecl = RType->getDecl();
	if (RDecl->isUnion() && ILE->getInitializedFieldInUnion())
	FillInEmptyInitForField(0, ILE->getInitializedFieldInUnion(),
	Entity, ILE, RequiresSecondPass, FillWithNoInit);
	else if (RDecl->isUnion() && isa<CXXRecordDecl>(RDecl) &&
	cast<CXXRecordDecl>(RDecl)->hasInClassInitializer()) {
	for (auto *Field : RDecl->fields()) {
	if (Field->hasInClassInitializer()) {
	FillInEmptyInitForField(0, Field, Entity, ILE, RequiresSecondPass,
	FillWithNoInit);
	break;
	}
	}
	} else {
	// The fields beyond ILE->getNumInits() are default initialized, so in
	// order to leave them uninitialized, the ILE is expanded and the extra
	// fields are then filled with NoInitExpr.
	unsigned NumElems = numStructUnionElements(ILE->getType());
	if (RDecl->hasFlexibleArrayMember())
	++NumElems;
	if (!VerifyOnly && ILE->getNumInits() < NumElems)
	ILE->resizeInits(SemaRef.Context, NumElems);

	unsigned Init = 0;

	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RDecl)) {
	for (auto &Base : CXXRD->bases()) {
	if (hadError)
	return;

	FillInEmptyInitForBase(Init, Base, Entity, ILE, RequiresSecondPass,
	FillWithNoInit);
	++Init;
	}
	}

	for (auto *Field : RDecl->fields()) {
	if (Field->isUnnamedBitfield())
	continue;

	if (hadError)
	return;

	FillInEmptyInitForField(Init, Field, Entity, ILE, RequiresSecondPass,
	FillWithNoInit);
	if (hadError)
	return;

	++Init;

	// Only look at the first initialization of a union.
	if (RDecl->isUnion())
	break;
	}
	}

	return;
	}

	QualType ElementType;

	InitializedEntity ElementEntity = Entity;
	unsigned NumInits = ILE->getNumInits();
	unsigned NumElements = NumInits;
	if (const ArrayType *AType = SemaRef.Context.getAsArrayType(ILE->getType())) {
	ElementType = AType->getElementType();
	if (const auto *CAType = dyn_cast<ConstantArrayType>(AType))
	NumElements = CAType->getSize().getZExtValue();
	// For an array new with an unknown bound, ask for one additional element
	// in order to populate the array filler.
	if (Entity.isVariableLengthArrayNew())
	++NumElements;
	ElementEntity = InitializedEntity::InitializeElement(SemaRef.Context,
	0, Entity);
	} else if (const VectorType *VType = ILE->getType()->getAs<VectorType>()) {
	ElementType = VType->getElementType();
	NumElements = VType->getNumElements();
	ElementEntity = InitializedEntity::InitializeElement(SemaRef.Context,
	0, Entity);
	} else
	ElementType = ILE->getType();

	bool SkipEmptyInitChecks = false;
	for (unsigned Init = 0; Init != NumElements; ++Init) {
	if (hadError)
	return;

	if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement \|\|
	ElementEntity.getKind() == InitializedEntity::EK_VectorElement)
	ElementEntity.setElementIndex(Init);

	if (Init >= NumInits && (ILE->hasArrayFiller() \|\| SkipEmptyInitChecks))
	return;

	Expr *InitExpr = (Init < NumInits ? ILE->getInit(Init) : nullptr);
	if (!InitExpr && Init < NumInits && ILE->hasArrayFiller())
	ILE->setInit(Init, ILE->getArrayFiller());
	else if (!InitExpr && !ILE->hasArrayFiller()) {
	// In VerifyOnly mode, there's no point performing empty initialization
	// more than once.
	if (SkipEmptyInitChecks)
	continue;

	Expr *Filler = nullptr;

	if (FillWithNoInit)
	Filler = new (SemaRef.Context) NoInitExpr(ElementType);
	else {
	ExprResult ElementInit =
	PerformEmptyInit(ILE->getEndLoc(), ElementEntity);
	if (ElementInit.isInvalid()) {
	hadError = true;
	return;
	}

	Filler = ElementInit.getAs<Expr>();
	}

	if (hadError) {
	// Do nothing
	} else if (VerifyOnly) {
	SkipEmptyInitChecks = true;
	} else if (Init < NumInits) {
	// For arrays, just set the expression used for value-initialization
	// of the "holes" in the array.
	if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement)
	ILE->setArrayFiller(Filler);
	else
	ILE->setInit(Init, Filler);
	} else {
	// For arrays, just set the expression used for value-initialization
	// of the rest of elements and exit.
	if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement) {
	ILE->setArrayFiller(Filler);
	return;
	}

	if (!isa<ImplicitValueInitExpr>(Filler) && !isa<NoInitExpr>(Filler)) {
	// Empty initialization requires a constructor call, so
	// extend the initializer list to include the constructor
	// call and make a note that we'll need to take another pass
	// through the initializer list.
	ILE->updateInit(SemaRef.Context, Init, Filler);
	RequiresSecondPass = true;
	}
	}
	} else if (InitListExpr *InnerILE
	= dyn_cast_or_null<InitListExpr>(InitExpr)) {
	FillInEmptyInitializations(ElementEntity, InnerILE, RequiresSecondPass,
	ILE, Init, FillWithNoInit);
	} else if (DesignatedInitUpdateExpr *InnerDIUE =
	dyn_cast_or_null<DesignatedInitUpdateExpr>(InitExpr)) {
	FillInEmptyInitializations(ElementEntity, InnerDIUE->getUpdater(),
	RequiresSecondPass, ILE, Init,
	/FillWithNoInit =/true);
	}
	}
	}

	static bool hasAnyDesignatedInits(const InitListExpr *IL) {
	for (const Stmt Init : IL)
	if (Init && isa<DesignatedInitExpr>(Init))
	return true;
	return false;
	}

	InitListChecker::InitListChecker(Sema &S, const InitializedEntity &Entity,
	InitListExpr *IL, QualType &T, bool VerifyOnly,
	bool TreatUnavailableAsInvalid,
	bool InOverloadResolution)
	: SemaRef(S), VerifyOnly(VerifyOnly),
	TreatUnavailableAsInvalid(TreatUnavailableAsInvalid),
	InOverloadResolution(InOverloadResolution) {
	if (!VerifyOnly \|\| hasAnyDesignatedInits(IL)) {
	FullyStructuredList =
	createInitListExpr(T, IL->getSourceRange(), IL->getNumInits());

	// FIXME: Check that IL isn't already the semantic form of some other
	// InitListExpr. If it is, we'd create a broken AST.
	if (!VerifyOnly)
	FullyStructuredList->setSyntacticForm(IL);
	}

	CheckExplicitInitList(Entity, IL, T, FullyStructuredList,
	/TopLevelObject=/true);

	if (!hadError && FullyStructuredList) {
	bool RequiresSecondPass = false;
	FillInEmptyInitializations(Entity, FullyStructuredList, RequiresSecondPass,
	/OuterILE=/nullptr, /OuterIndex=/0);
	if (RequiresSecondPass && !hadError)
	FillInEmptyInitializations(Entity, FullyStructuredList,
	RequiresSecondPass, nullptr, 0);
	}
	if (hadError && FullyStructuredList)
	FullyStructuredList->markError();
	}

	int InitListChecker::numArrayElements(QualType DeclType) {
	// FIXME: use a proper constant
	int maxElements = 0x7FFFFFFF;
	if (const ConstantArrayType *CAT =
	SemaRef.Context.getAsConstantArrayType(DeclType)) {
	maxElements = static_cast<int>(CAT->getSize().getZExtValue());
	}
	return maxElements;
	}

	int InitListChecker::numStructUnionElements(QualType DeclType) {
	RecordDecl *structDecl = DeclType->castAs<RecordType>()->getDecl();
	int InitializableMembers = 0;
	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(structDecl))
	InitializableMembers += CXXRD->getNumBases();
	for (const auto *Field : structDecl->fields())
	if (!Field->isUnnamedBitfield())
	++InitializableMembers;

	if (structDecl->isUnion())
	return std::min(InitializableMembers, 1);
	return InitializableMembers - structDecl->hasFlexibleArrayMember();
	}

	/// Determine whether Entity is an entity for which it is idiomatic to elide
	/// the braces in aggregate initialization.
	static bool isIdiomaticBraceElisionEntity(const InitializedEntity &Entity) {
	// Recursive initialization of the one and only field within an aggregate
	// class is considered idiomatic. This case arises in particular for
	// initialization of std::array, where the C++ standard suggests the idiom of
	//
	// std::array<T, N> arr = {1, 2, 3};
	//
	// (where std::array is an aggregate struct containing a single array field.

	if (!Entity.getParent())
	return false;

	// Allows elide brace initialization for aggregates with empty base.
	if (Entity.getKind() == InitializedEntity::EK_Base) {
	auto *ParentRD =
	Entity.getParent()->getType()->castAs<RecordType>()->getDecl();
	CXXRecordDecl *CXXRD = cast<CXXRecordDecl>(ParentRD);
	return CXXRD->getNumBases() == 1 && CXXRD->field_empty();
	}

	// Allow brace elision if the only subobject is a field.
	if (Entity.getKind() == InitializedEntity::EK_Member) {
	auto *ParentRD =
	Entity.getParent()->getType()->castAs<RecordType>()->getDecl();
	if (CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(ParentRD)) {
	if (CXXRD->getNumBases()) {
	return false;
	}
	}
	auto FieldIt = ParentRD->field_begin();
	assert(FieldIt != ParentRD->field_end() &&
	"no fields but have initializer for member?");
	return ++FieldIt == ParentRD->field_end();
	}

	return false;
	}

	/// Check whether the range of the initializer \p ParentIList from element
	/// \p Index onwards can be used to initialize an object of type \p T. Update
	/// \p Index to indicate how many elements of the list were consumed.
	///
	/// This also fills in \p StructuredList, from element \p StructuredIndex
	/// onwards, with the fully-braced, desugared form of the initialization.
	void InitListChecker::CheckImplicitInitList(const InitializedEntity &Entity,
	InitListExpr *ParentIList,
	QualType T, unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex) {
	int maxElements = 0;

	if (T->isArrayType())
	maxElements = numArrayElements(T);
	else if (T->isRecordType())
	maxElements = numStructUnionElements(T);
	else if (T->isVectorType())
	maxElements = T->castAs<VectorType>()->getNumElements();
	else
	llvm_unreachable("CheckImplicitInitList(): Illegal type");

	if (maxElements == 0) {
	if (!VerifyOnly)
	SemaRef.Diag(ParentIList->getInit(Index)->getBeginLoc(),
	diag::err_implicit_empty_initializer);
	++Index;
	hadError = true;
	return;
	}

	// Build a structured initializer list corresponding to this subobject.
	InitListExpr *StructuredSubobjectInitList = getStructuredSubobjectInit(
	ParentIList, Index, T, StructuredList, StructuredIndex,
	SourceRange(ParentIList->getInit(Index)->getBeginLoc(),
	ParentIList->getSourceRange().getEnd()));
	unsigned StructuredSubobjectInitIndex = 0;

	// Check the element types and build the structural subobject.
	unsigned StartIndex = Index;
	CheckListElementTypes(Entity, ParentIList, T,
	/SubobjectIsDesignatorContext=/false, Index,
	StructuredSubobjectInitList,
	StructuredSubobjectInitIndex);

	if (StructuredSubobjectInitList) {
	StructuredSubobjectInitList->setType(T);

	unsigned EndIndex = (Index == StartIndex? StartIndex : Index - 1);
	// Update the structured sub-object initializer so that it's ending
	// range corresponds with the end of the last initializer it used.
	if (EndIndex < ParentIList->getNumInits() &&
	ParentIList->getInit(EndIndex)) {
	SourceLocation EndLoc
	= ParentIList->getInit(EndIndex)->getSourceRange().getEnd();
	StructuredSubobjectInitList->setRBraceLoc(EndLoc);
	}

	// Complain about missing braces.
	if (!VerifyOnly && (T->isArrayType() \|\| T->isRecordType()) &&
	!ParentIList->isIdiomaticZeroInitializer(SemaRef.getLangOpts()) &&
	!isIdiomaticBraceElisionEntity(Entity)) {
	SemaRef.Diag(StructuredSubobjectInitList->getBeginLoc(),
	diag::warn_missing_braces)
	<< StructuredSubobjectInitList->getSourceRange()
	<< FixItHint::CreateInsertion(
	StructuredSubobjectInitList->getBeginLoc(), "{")
	<< FixItHint::CreateInsertion(
	SemaRef.getLocForEndOfToken(
	StructuredSubobjectInitList->getEndLoc()),
	"}");
	}

	// Warn if this type won't be an aggregate in future versions of C++.
	auto *CXXRD = T->getAsCXXRecordDecl();
	if (!VerifyOnly && CXXRD && CXXRD->hasUserDeclaredConstructor()) {
	SemaRef.Diag(StructuredSubobjectInitList->getBeginLoc(),
	diag::warn_cxx20_compat_aggregate_init_with_ctors)
	<< StructuredSubobjectInitList->getSourceRange() << T;
	}
	}
	}

	/// Warn that \p Entity was of scalar type and was initialized by a
	/// single-element braced initializer list.
	static void warnBracedScalarInit(Sema &S, const InitializedEntity &Entity,
	SourceRange Braces) {
	// Don't warn during template instantiation. If the initialization was
	// non-dependent, we warned during the initial parse; otherwise, the
	// type might not be scalar in some uses of the template.
	if (S.inTemplateInstantiation())
	return;

	unsigned DiagID = 0;

	switch (Entity.getKind()) {
	case InitializedEntity::EK_VectorElement:
	case InitializedEntity::EK_ComplexElement:
	case InitializedEntity::EK_ArrayElement:
	case InitializedEntity::EK_Parameter:
	case InitializedEntity::EK_Parameter_CF_Audited:
	case InitializedEntity::EK_TemplateParameter:
	case InitializedEntity::EK_Result:
	+ case InitializedEntity::EK_ParenAggInitMember:
	// Extra braces here are suspicious.
	DiagID = diag::warn_braces_around_init;
	break;

	case InitializedEntity::EK_Member:
	// Warn on aggregate initialization but not on ctor init list or
	// default member initializer.
	if (Entity.getParent())
	DiagID = diag::warn_braces_around_init;
	break;

	case InitializedEntity::EK_Variable:
	case InitializedEntity::EK_LambdaCapture:
	// No warning, might be direct-list-initialization.
	// FIXME: Should we warn for copy-list-initialization in these cases?
	break;

	case InitializedEntity::EK_New:
	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_CompoundLiteralInit:
	// No warning, braces are part of the syntax of the underlying construct.
	break;

	case InitializedEntity::EK_RelatedResult:
	// No warning, we already warned when initializing the result.
	break;

	case InitializedEntity::EK_Exception:
	case InitializedEntity::EK_Base:
	case InitializedEntity::EK_Delegating:
	case InitializedEntity::EK_BlockElement:
	case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
	case InitializedEntity::EK_Binding:
	case InitializedEntity::EK_StmtExprResult:
	llvm_unreachable("unexpected braced scalar init");
	}

	if (DiagID) {
	S.Diag(Braces.getBegin(), DiagID)
	<< Entity.getType()->isSizelessBuiltinType() << Braces
	<< FixItHint::CreateRemoval(Braces.getBegin())
	<< FixItHint::CreateRemoval(Braces.getEnd());
	}
	}

	/// Check whether the initializer \p IList (that was written with explicit
	/// braces) can be used to initialize an object of type \p T.
	///
	/// This also fills in \p StructuredList with the fully-braced, desugared
	/// form of the initialization.
	void InitListChecker::CheckExplicitInitList(const InitializedEntity &Entity,
	InitListExpr *IList, QualType &T,
	InitListExpr *StructuredList,
	bool TopLevelObject) {
	unsigned Index = 0, StructuredIndex = 0;
	CheckListElementTypes(Entity, IList, T, /SubobjectIsDesignatorContext=/true,
	Index, StructuredList, StructuredIndex, TopLevelObject);
	if (StructuredList) {
	QualType ExprTy = T;
	if (!ExprTy->isArrayType())
	ExprTy = ExprTy.getNonLValueExprType(SemaRef.Context);
	if (!VerifyOnly)
	IList->setType(ExprTy);
	StructuredList->setType(ExprTy);
	}
	if (hadError)
	return;

	// Don't complain for incomplete types, since we'll get an error elsewhere.
	if (Index < IList->getNumInits() && !T->isIncompleteType()) {
	// We have leftover initializers
	bool ExtraInitsIsError = SemaRef.getLangOpts().CPlusPlus \|\|
	(SemaRef.getLangOpts().OpenCL && T->isVectorType());
	hadError = ExtraInitsIsError;
	if (VerifyOnly) {
	return;
	} else if (StructuredIndex == 1 &&
	IsStringInit(StructuredList->getInit(0), T, SemaRef.Context) ==
	SIF_None) {
	unsigned DK =
	ExtraInitsIsError
	? diag::err_excess_initializers_in_char_array_initializer
	: diag::ext_excess_initializers_in_char_array_initializer;
	SemaRef.Diag(IList->getInit(Index)->getBeginLoc(), DK)
	<< IList->getInit(Index)->getSourceRange();
	} else if (T->isSizelessBuiltinType()) {
	unsigned DK = ExtraInitsIsError
	? diag::err_excess_initializers_for_sizeless_type
	: diag::ext_excess_initializers_for_sizeless_type;
	SemaRef.Diag(IList->getInit(Index)->getBeginLoc(), DK)
	<< T << IList->getInit(Index)->getSourceRange();
	} else {
	int initKind = T->isArrayType() ? 0 :
	T->isVectorType() ? 1 :
	T->isScalarType() ? 2 :
	T->isUnionType() ? 3 :
	4;

	unsigned DK = ExtraInitsIsError ? diag::err_excess_initializers
	: diag::ext_excess_initializers;
	SemaRef.Diag(IList->getInit(Index)->getBeginLoc(), DK)
	<< initKind << IList->getInit(Index)->getSourceRange();
	}
	}

	if (!VerifyOnly) {
	if (T->isScalarType() && IList->getNumInits() == 1 &&
	!isa<InitListExpr>(IList->getInit(0)))
	warnBracedScalarInit(SemaRef, Entity, IList->getSourceRange());

	// Warn if this is a class type that won't be an aggregate in future
	// versions of C++.
	auto *CXXRD = T->getAsCXXRecordDecl();
	if (CXXRD && CXXRD->hasUserDeclaredConstructor()) {
	// Don't warn if there's an equivalent default constructor that would be
	// used instead.
	bool HasEquivCtor = false;
	if (IList->getNumInits() == 0) {
	auto *CD = SemaRef.LookupDefaultConstructor(CXXRD);
	HasEquivCtor = CD && !CD->isDeleted();
	}

	if (!HasEquivCtor) {
	SemaRef.Diag(IList->getBeginLoc(),
	diag::warn_cxx20_compat_aggregate_init_with_ctors)
	<< IList->getSourceRange() << T;
	}
	}
	}
	}

	void InitListChecker::CheckListElementTypes(const InitializedEntity &Entity,
	InitListExpr *IList,
	QualType &DeclType,
	bool SubobjectIsDesignatorContext,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool TopLevelObject) {
	if (DeclType->isAnyComplexType() && SubobjectIsDesignatorContext) {
	// Explicitly braced initializer for complex type can be real+imaginary
	// parts.
	CheckComplexType(Entity, IList, DeclType, Index,
	StructuredList, StructuredIndex);
	} else if (DeclType->isScalarType()) {
	CheckScalarType(Entity, IList, DeclType, Index,
	StructuredList, StructuredIndex);
	} else if (DeclType->isVectorType()) {
	CheckVectorType(Entity, IList, DeclType, Index,
	StructuredList, StructuredIndex);
	} else if (DeclType->isRecordType()) {
	assert(DeclType->isAggregateType() &&
	"non-aggregate records should be handed in CheckSubElementType");
	RecordDecl *RD = DeclType->castAs<RecordType>()->getDecl();
	auto Bases =
	CXXRecordDecl::base_class_range(CXXRecordDecl::base_class_iterator(),
	CXXRecordDecl::base_class_iterator());
	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	Bases = CXXRD->bases();
	CheckStructUnionTypes(Entity, IList, DeclType, Bases, RD->field_begin(),
	SubobjectIsDesignatorContext, Index, StructuredList,
	StructuredIndex, TopLevelObject);
	} else if (DeclType->isArrayType()) {
	llvm::APSInt Zero(
	SemaRef.Context.getTypeSize(SemaRef.Context.getSizeType()),
	false);
	CheckArrayType(Entity, IList, DeclType, Zero,
	SubobjectIsDesignatorContext, Index,
	StructuredList, StructuredIndex);
	} else if (DeclType->isVoidType() \|\| DeclType->isFunctionType()) {
	// This type is invalid, issue a diagnostic.
	++Index;
	if (!VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(), diag::err_illegal_initializer_type)
	<< DeclType;
	hadError = true;
	} else if (DeclType->isReferenceType()) {
	CheckReferenceType(Entity, IList, DeclType, Index,
	StructuredList, StructuredIndex);
	} else if (DeclType->isObjCObjectType()) {
	if (!VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(), diag::err_init_objc_class) << DeclType;
	hadError = true;
	} else if (DeclType->isOCLIntelSubgroupAVCType() \|\|
	DeclType->isSizelessBuiltinType()) {
	// Checks for scalar type are sufficient for these types too.
	CheckScalarType(Entity, IList, DeclType, Index, StructuredList,
	StructuredIndex);
	} else {
	if (!VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(), diag::err_illegal_initializer_type)
	<< DeclType;
	hadError = true;
	}
	}

	void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
	InitListExpr *IList,
	QualType ElemType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool DirectlyDesignated) {
	Expr *expr = IList->getInit(Index);

	if (ElemType->isReferenceType())
	return CheckReferenceType(Entity, IList, ElemType, Index,
	StructuredList, StructuredIndex);

	if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) {
	if (SubInitList->getNumInits() == 1 &&
	IsStringInit(SubInitList->getInit(0), ElemType, SemaRef.Context) ==
	SIF_None) {
	// FIXME: It would be more faithful and no less correct to include an
	// InitListExpr in the semantic form of the initializer list in this case.
	expr = SubInitList->getInit(0);
	}
	// Nested aggregate initialization and C++ initialization are handled later.
	} else if (isa<ImplicitValueInitExpr>(expr)) {
	// This happens during template instantiation when we see an InitListExpr
	// that we've already checked once.
	assert(SemaRef.Context.hasSameType(expr->getType(), ElemType) &&
	"found implicit initialization for the wrong type");
	UpdateStructuredListElement(StructuredList, StructuredIndex, expr);
	++Index;
	return;
	}

	if (SemaRef.getLangOpts().CPlusPlus \|\| isa<InitListExpr>(expr)) {
	// C++ [dcl.init.aggr]p2:
	// Each member is copy-initialized from the corresponding
	// initializer-clause.

	// FIXME: Better EqualLoc?
	InitializationKind Kind =
	InitializationKind::CreateCopy(expr->getBeginLoc(), SourceLocation());

	// Vector elements can be initialized from other vectors in which case
	// we need initialization entity with a type of a vector (and not a vector
	// element!) initializing multiple vector elements.
	auto TmpEntity =
	(ElemType->isExtVectorType() && !Entity.getType()->isExtVectorType())
	? InitializedEntity::InitializeTemporary(ElemType)
	: Entity;

	InitializationSequence Seq(SemaRef, TmpEntity, Kind, expr,
	/TopLevelOfInitList/ true);

	// C++14 [dcl.init.aggr]p13:
	// If the assignment-expression can initialize a member, the member is
	// initialized. Otherwise [...] brace elision is assumed
	//
	// Brace elision is never performed if the element is not an
	// assignment-expression.
	if (Seq \|\| isa<InitListExpr>(expr)) {
	if (!VerifyOnly) {
	ExprResult Result = Seq.Perform(SemaRef, TmpEntity, Kind, expr);
	if (Result.isInvalid())
	hadError = true;

	UpdateStructuredListElement(StructuredList, StructuredIndex,
	Result.getAs<Expr>());
	} else if (!Seq) {
	hadError = true;
	} else if (StructuredList) {
	UpdateStructuredListElement(StructuredList, StructuredIndex,
	getDummyInit());
	}
	++Index;
	return;
	}

	// Fall through for subaggregate initialization
	} else if (ElemType->isScalarType() \|\| ElemType->isAtomicType()) {
	// FIXME: Need to handle atomic aggregate types with implicit init lists.
	return CheckScalarType(Entity, IList, ElemType, Index,
	StructuredList, StructuredIndex);
	} else if (const ArrayType *arrayType =
	SemaRef.Context.getAsArrayType(ElemType)) {
	// arrayType can be incomplete if we're initializing a flexible
	// array member. There's nothing we can do with the completed
	// type here, though.

	if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
	// FIXME: Should we do this checking in verify-only mode?
	if (!VerifyOnly)
	CheckStringInit(expr, ElemType, arrayType, SemaRef);
	if (StructuredList)
	UpdateStructuredListElement(StructuredList, StructuredIndex, expr);
	++Index;
	return;
	}

	// Fall through for subaggregate initialization.

	} else {
	assert((ElemType->isRecordType() \|\| ElemType->isVectorType() \|\|
	ElemType->isOpenCLSpecificType()) && "Unexpected type");

	// C99 6.7.8p13:
	//
	// The initializer for a structure or union object that has
	// automatic storage duration shall be either an initializer
	// list as described below, or a single expression that has
	// compatible structure or union type. In the latter case, the
	// initial value of the object, including unnamed members, is
	// that of the expression.
	ExprResult ExprRes = expr;
	if (SemaRef.CheckSingleAssignmentConstraints(
	ElemType, ExprRes, !VerifyOnly) != Sema::Incompatible) {
	if (ExprRes.isInvalid())
	hadError = true;
	else {
	ExprRes = SemaRef.DefaultFunctionArrayLvalueConversion(ExprRes.get());
	if (ExprRes.isInvalid())
	hadError = true;
	}
	UpdateStructuredListElement(StructuredList, StructuredIndex,
	ExprRes.getAs<Expr>());
	++Index;
	return;
	}
	ExprRes.get();
	// Fall through for subaggregate initialization
	}

	// C++ [dcl.init.aggr]p12:
	//
	// [...] Otherwise, if the member is itself a non-empty
	// subaggregate, brace elision is assumed and the initializer is
	// considered for the initialization of the first member of
	// the subaggregate.
	// OpenCL vector initializer is handled elsewhere.
	if ((!SemaRef.getLangOpts().OpenCL && ElemType->isVectorType()) \|\|
	ElemType->isAggregateType()) {
	CheckImplicitInitList(Entity, IList, ElemType, Index, StructuredList,
	StructuredIndex);
	++StructuredIndex;

	// In C++20, brace elision is not permitted for a designated initializer.
	if (DirectlyDesignated && SemaRef.getLangOpts().CPlusPlus && !hadError) {
	if (InOverloadResolution)
	hadError = true;
	if (!VerifyOnly) {
	SemaRef.Diag(expr->getBeginLoc(),
	diag::ext_designated_init_brace_elision)
	<< expr->getSourceRange()
	<< FixItHint::CreateInsertion(expr->getBeginLoc(), "{")
	<< FixItHint::CreateInsertion(
	SemaRef.getLocForEndOfToken(expr->getEndLoc()), "}");
	}
	}
	} else {
	if (!VerifyOnly) {
	// We cannot initialize this element, so let PerformCopyInitialization
	// produce the appropriate diagnostic. We already checked that this
	// initialization will fail.
	ExprResult Copy =
	SemaRef.PerformCopyInitialization(Entity, SourceLocation(), expr,
	/TopLevelOfInitList=/true);
	(void)Copy;
	assert(Copy.isInvalid() &&
	"expected non-aggregate initialization to fail");
	}
	hadError = true;
	++Index;
	++StructuredIndex;
	}
	}

	void InitListChecker::CheckComplexType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex) {
	assert(Index == 0 && "Index in explicit init list must be zero");

	// As an extension, clang supports complex initializers, which initialize
	// a complex number component-wise. When an explicit initializer list for
	// a complex number contains two initializers, this extension kicks in:
	// it expects the initializer list to contain two elements convertible to
	// the element type of the complex type. The first element initializes
	// the real part, and the second element intitializes the imaginary part.

	if (IList->getNumInits() != 2)
	return CheckScalarType(Entity, IList, DeclType, Index, StructuredList,
	StructuredIndex);

	// This is an extension in C. (The builtin _Complex type does not exist
	// in the C++ standard.)
	if (!SemaRef.getLangOpts().CPlusPlus && !VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(), diag::ext_complex_component_init)
	<< IList->getSourceRange();

	// Initialize the complex number.
	QualType elementType = DeclType->castAs<ComplexType>()->getElementType();
	InitializedEntity ElementEntity =
	InitializedEntity::InitializeElement(SemaRef.Context, 0, Entity);

	for (unsigned i = 0; i < 2; ++i) {
	ElementEntity.setElementIndex(Index);
	CheckSubElementType(ElementEntity, IList, elementType, Index,
	StructuredList, StructuredIndex);
	}
	}

	void InitListChecker::CheckScalarType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex) {
	if (Index >= IList->getNumInits()) {
	if (!VerifyOnly) {
	if (DeclType->isSizelessBuiltinType())
	SemaRef.Diag(IList->getBeginLoc(),
	SemaRef.getLangOpts().CPlusPlus11
	? diag::warn_cxx98_compat_empty_sizeless_initializer
	: diag::err_empty_sizeless_initializer)
	<< DeclType << IList->getSourceRange();
	else
	SemaRef.Diag(IList->getBeginLoc(),
	SemaRef.getLangOpts().CPlusPlus11
	? diag::warn_cxx98_compat_empty_scalar_initializer
	: diag::err_empty_scalar_initializer)
	<< IList->getSourceRange();
	}
	hadError = !SemaRef.getLangOpts().CPlusPlus11;
	++Index;
	++StructuredIndex;
	return;
	}

	Expr *expr = IList->getInit(Index);
	if (InitListExpr *SubIList = dyn_cast<InitListExpr>(expr)) {
	// FIXME: This is invalid, and accepting it causes overload resolution
	// to pick the wrong overload in some corner cases.
	if (!VerifyOnly)
	SemaRef.Diag(SubIList->getBeginLoc(), diag::ext_many_braces_around_init)
	<< DeclType->isSizelessBuiltinType() << SubIList->getSourceRange();

	CheckScalarType(Entity, SubIList, DeclType, Index, StructuredList,
	StructuredIndex);
	return;
	} else if (isa<DesignatedInitExpr>(expr)) {
	if (!VerifyOnly)
	SemaRef.Diag(expr->getBeginLoc(),
	diag::err_designator_for_scalar_or_sizeless_init)
	<< DeclType->isSizelessBuiltinType() << DeclType
	<< expr->getSourceRange();
	hadError = true;
	++Index;
	++StructuredIndex;
	return;
	}

	ExprResult Result;
	if (VerifyOnly) {
	if (SemaRef.CanPerformCopyInitialization(Entity, expr))
	Result = getDummyInit();
	else
	Result = ExprError();
	} else {
	Result =
	SemaRef.PerformCopyInitialization(Entity, expr->getBeginLoc(), expr,
	/TopLevelOfInitList=/true);
	}

	Expr *ResultExpr = nullptr;

	if (Result.isInvalid())
	hadError = true; // types weren't compatible.
	else {
	ResultExpr = Result.getAs<Expr>();

	if (ResultExpr != expr && !VerifyOnly) {
	// The type was promoted, update initializer list.
	// FIXME: Why are we updating the syntactic init list?
	IList->setInit(Index, ResultExpr);
	}
	}
	UpdateStructuredListElement(StructuredList, StructuredIndex, ResultExpr);
	++Index;
	}

	void InitListChecker::CheckReferenceType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex) {
	if (Index >= IList->getNumInits()) {
	// FIXME: It would be wonderful if we could point at the actual member. In
	// general, it would be useful to pass location information down the stack,
	// so that we know the location (or decl) of the "current object" being
	// initialized.
	if (!VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(),
	diag::err_init_reference_member_uninitialized)
	<< DeclType << IList->getSourceRange();
	hadError = true;
	++Index;
	++StructuredIndex;
	return;
	}

	Expr *expr = IList->getInit(Index);
	if (isa<InitListExpr>(expr) && !SemaRef.getLangOpts().CPlusPlus11) {
	if (!VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(), diag::err_init_non_aggr_init_list)
	<< DeclType << IList->getSourceRange();
	hadError = true;
	++Index;
	++StructuredIndex;
	return;
	}

	ExprResult Result;
	if (VerifyOnly) {
	if (SemaRef.CanPerformCopyInitialization(Entity,expr))
	Result = getDummyInit();
	else
	Result = ExprError();
	} else {
	Result =
	SemaRef.PerformCopyInitialization(Entity, expr->getBeginLoc(), expr,
	/TopLevelOfInitList=/true);
	}

	if (Result.isInvalid())
	hadError = true;

	expr = Result.getAs<Expr>();
	// FIXME: Why are we updating the syntactic init list?
	if (!VerifyOnly && expr)
	IList->setInit(Index, expr);

	UpdateStructuredListElement(StructuredList, StructuredIndex, expr);
	++Index;
	}

	void InitListChecker::CheckVectorType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType DeclType,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex) {
	const VectorType *VT = DeclType->castAs<VectorType>();
	unsigned maxElements = VT->getNumElements();
	unsigned numEltsInit = 0;
	QualType elementType = VT->getElementType();

	if (Index >= IList->getNumInits()) {
	// Make sure the element type can be value-initialized.
	CheckEmptyInitializable(
	InitializedEntity::InitializeElement(SemaRef.Context, 0, Entity),
	IList->getEndLoc());
	return;
	}

	if (!SemaRef.getLangOpts().OpenCL && !SemaRef.getLangOpts().HLSL ) {
	// If the initializing element is a vector, try to copy-initialize
	// instead of breaking it apart (which is doomed to failure anyway).
	Expr *Init = IList->getInit(Index);
	if (!isa<InitListExpr>(Init) && Init->getType()->isVectorType()) {
	ExprResult Result;
	if (VerifyOnly) {
	if (SemaRef.CanPerformCopyInitialization(Entity, Init))
	Result = getDummyInit();
	else
	Result = ExprError();
	} else {
	Result =
	SemaRef.PerformCopyInitialization(Entity, Init->getBeginLoc(), Init,
	/TopLevelOfInitList=/true);
	}

	Expr *ResultExpr = nullptr;
	if (Result.isInvalid())
	hadError = true; // types weren't compatible.
	else {
	ResultExpr = Result.getAs<Expr>();

	if (ResultExpr != Init && !VerifyOnly) {
	// The type was promoted, update initializer list.
	// FIXME: Why are we updating the syntactic init list?
	IList->setInit(Index, ResultExpr);
	}
	}
	UpdateStructuredListElement(StructuredList, StructuredIndex, ResultExpr);
	++Index;
	return;
	}

	InitializedEntity ElementEntity =
	InitializedEntity::InitializeElement(SemaRef.Context, 0, Entity);

	for (unsigned i = 0; i < maxElements; ++i, ++numEltsInit) {
	// Don't attempt to go past the end of the init list
	if (Index >= IList->getNumInits()) {
	CheckEmptyInitializable(ElementEntity, IList->getEndLoc());
	break;
	}

	ElementEntity.setElementIndex(Index);
	CheckSubElementType(ElementEntity, IList, elementType, Index,
	StructuredList, StructuredIndex);
	}

	if (VerifyOnly)
	return;

	bool isBigEndian = SemaRef.Context.getTargetInfo().isBigEndian();
	const VectorType *T = Entity.getType()->castAs<VectorType>();
	if (isBigEndian && (T->getVectorKind() == VectorType::NeonVector \|\|
	T->getVectorKind() == VectorType::NeonPolyVector)) {
	// The ability to use vector initializer lists is a GNU vector extension
	// and is unrelated to the NEON intrinsics in arm_neon.h. On little
	// endian machines it works fine, however on big endian machines it
	// exhibits surprising behaviour:
	//
	// uint32x2_t x = {42, 64};
	// return vget_lane_u32(x, 0); // Will return 64.
	//
	// Because of this, explicitly call out that it is non-portable.
	//
	SemaRef.Diag(IList->getBeginLoc(),
	diag::warn_neon_vector_initializer_non_portable);

	const char *typeCode;
	unsigned typeSize = SemaRef.Context.getTypeSize(elementType);

	if (elementType->isFloatingType())
	typeCode = "f";
	else if (elementType->isSignedIntegerType())
	typeCode = "s";
	else if (elementType->isUnsignedIntegerType())
	typeCode = "u";
	else
	llvm_unreachable("Invalid element type!");

	SemaRef.Diag(IList->getBeginLoc(),
	SemaRef.Context.getTypeSize(VT) > 64
	? diag::note_neon_vector_initializer_non_portable_q
	: diag::note_neon_vector_initializer_non_portable)
	<< typeCode << typeSize;
	}

	return;
	}

	InitializedEntity ElementEntity =
	InitializedEntity::InitializeElement(SemaRef.Context, 0, Entity);

	// OpenCL and HLSL initializers allow vectors to be constructed from vectors.
	for (unsigned i = 0; i < maxElements; ++i) {
	// Don't attempt to go past the end of the init list
	if (Index >= IList->getNumInits())
	break;

	ElementEntity.setElementIndex(Index);

	QualType IType = IList->getInit(Index)->getType();
	if (!IType->isVectorType()) {
	CheckSubElementType(ElementEntity, IList, elementType, Index,
	StructuredList, StructuredIndex);
	++numEltsInit;
	} else {
	QualType VecType;
	const VectorType *IVT = IType->castAs<VectorType>();
	unsigned numIElts = IVT->getNumElements();

	if (IType->isExtVectorType())
	VecType = SemaRef.Context.getExtVectorType(elementType, numIElts);
	else
	VecType = SemaRef.Context.getVectorType(elementType, numIElts,
	IVT->getVectorKind());
	CheckSubElementType(ElementEntity, IList, VecType, Index,
	StructuredList, StructuredIndex);
	numEltsInit += numIElts;
	}
	}

	// OpenCL and HLSL require all elements to be initialized.
	if (numEltsInit != maxElements) {
	if (!VerifyOnly)
	SemaRef.Diag(IList->getBeginLoc(),
	diag::err_vector_incorrect_num_initializers)
	<< (numEltsInit < maxElements) << maxElements << numEltsInit;
	hadError = true;
	}
	}

	/// Check if the type of a class element has an accessible destructor, and marks
	/// it referenced. Returns true if we shouldn't form a reference to the
	/// destructor.
	///
	/// Aggregate initialization requires a class element's destructor be
	/// accessible per 11.6.1 [dcl.init.aggr]:
	///
	/// The destructor for each element of class type is potentially invoked
	/// (15.4 [class.dtor]) from the context where the aggregate initialization
	/// occurs.
	static bool checkDestructorReference(QualType ElementType, SourceLocation Loc,
	Sema &SemaRef) {
	auto *CXXRD = ElementType->getAsCXXRecordDecl();
	if (!CXXRD)
	return false;

	CXXDestructorDecl *Destructor = SemaRef.LookupDestructor(CXXRD);
	SemaRef.CheckDestructorAccess(Loc, Destructor,
	SemaRef.PDiag(diag::err_access_dtor_temp)
	<< ElementType);
	SemaRef.MarkFunctionReferenced(Loc, Destructor);
	return SemaRef.DiagnoseUseOfDecl(Destructor, Loc);
	}

	void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
	InitListExpr *IList, QualType &DeclType,
	llvm::APSInt elementIndex,
	bool SubobjectIsDesignatorContext,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex) {
	const ArrayType *arrayType = SemaRef.Context.getAsArrayType(DeclType);

	if (!VerifyOnly) {
	if (checkDestructorReference(arrayType->getElementType(),
	IList->getEndLoc(), SemaRef)) {
	hadError = true;
	return;
	}
	}

	// Check for the special-case of initializing an array with a string.
	if (Index < IList->getNumInits()) {
	if (IsStringInit(IList->getInit(Index), arrayType, SemaRef.Context) ==
	SIF_None) {
	// We place the string literal directly into the resulting
	// initializer list. This is the only place where the structure
	// of the structured initializer list doesn't match exactly,
	// because doing so would involve allocating one character
	// constant for each string.
	// FIXME: Should we do these checks in verify-only mode too?
	if (!VerifyOnly)
	CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef);
	if (StructuredList) {
	UpdateStructuredListElement(StructuredList, StructuredIndex,
	IList->getInit(Index));
	StructuredList->resizeInits(SemaRef.Context, StructuredIndex);
	}
	++Index;
	return;
	}
	}
	if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(arrayType)) {
	// Check for VLAs; in standard C it would be possible to check this
	// earlier, but I don't know where clang accepts VLAs (gcc accepts
	// them in all sorts of strange places).
	if (!VerifyOnly)
	SemaRef.Diag(VAT->getSizeExpr()->getBeginLoc(),
	diag::err_variable_object_no_init)
	<< VAT->getSizeExpr()->getSourceRange();
	hadError = true;
	++Index;
	++StructuredIndex;
	return;
	}

	// We might know the maximum number of elements in advance.
	llvm::APSInt maxElements(elementIndex.getBitWidth(),
	elementIndex.isUnsigned());
	bool maxElementsKnown = false;
	if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(arrayType)) {
	maxElements = CAT->getSize();
	elementIndex = elementIndex.extOrTrunc(maxElements.getBitWidth());
	elementIndex.setIsUnsigned(maxElements.isUnsigned());
	maxElementsKnown = true;
	}

	QualType elementType = arrayType->getElementType();
	while (Index < IList->getNumInits()) {
	Expr *Init = IList->getInit(Index);
	if (DesignatedInitExpr *DIE = dyn_cast<DesignatedInitExpr>(Init)) {
	// If we're not the subobject that matches up with the '{' for
	// the designator, we shouldn't be handling the
	// designator. Return immediately.
	if (!SubobjectIsDesignatorContext)
	return;

	// Handle this designated initializer. elementIndex will be
	// updated to be the next array element we'll initialize.
	if (CheckDesignatedInitializer(Entity, IList, DIE, 0,
	DeclType, nullptr, &elementIndex, Index,
	StructuredList, StructuredIndex, true,
	false)) {
	hadError = true;
	continue;
	}

	if (elementIndex.getBitWidth() > maxElements.getBitWidth())
	maxElements = maxElements.extend(elementIndex.getBitWidth());
	else if (elementIndex.getBitWidth() < maxElements.getBitWidth())
	elementIndex = elementIndex.extend(maxElements.getBitWidth());
	elementIndex.setIsUnsigned(maxElements.isUnsigned());

	// If the array is of incomplete type, keep track of the number of
	// elements in the initializer.
	if (!maxElementsKnown && elementIndex > maxElements)
	maxElements = elementIndex;

	continue;
	}

	// If we know the maximum number of elements, and we've already
	// hit it, stop consuming elements in the initializer list.
	if (maxElementsKnown && elementIndex == maxElements)
	break;

	InitializedEntity ElementEntity =
	InitializedEntity::InitializeElement(SemaRef.Context, StructuredIndex,
	Entity);
	// Check this element.
	CheckSubElementType(ElementEntity, IList, elementType, Index,
	StructuredList, StructuredIndex);
	++elementIndex;

	// If the array is of incomplete type, keep track of the number of
	// elements in the initializer.
	if (!maxElementsKnown && elementIndex > maxElements)
	maxElements = elementIndex;
	}
	if (!hadError && DeclType->isIncompleteArrayType() && !VerifyOnly) {
	// If this is an incomplete array type, the actual type needs to
	// be calculated here.
	llvm::APSInt Zero(maxElements.getBitWidth(), maxElements.isUnsigned());
	if (maxElements == Zero && !Entity.isVariableLengthArrayNew()) {
	// Sizing an array implicitly to zero is not allowed by ISO C,
	// but is supported by GNU.
	SemaRef.Diag(IList->getBeginLoc(), diag::ext_typecheck_zero_array_size);
	}

	DeclType = SemaRef.Context.getConstantArrayType(
	elementType, maxElements, nullptr, ArrayType::Normal, 0);
	}
	if (!hadError) {
	// If there are any members of the array that get value-initialized, check
	// that is possible. That happens if we know the bound and don't have
	// enough elements, or if we're performing an array new with an unknown
	// bound.
	if ((maxElementsKnown && elementIndex < maxElements) \|\|
	Entity.isVariableLengthArrayNew())
	CheckEmptyInitializable(
	InitializedEntity::InitializeElement(SemaRef.Context, 0, Entity),
	IList->getEndLoc());
	}
	}

	bool InitListChecker::CheckFlexibleArrayInit(const InitializedEntity &Entity,
	Expr *InitExpr,
	FieldDecl *Field,
	bool TopLevelObject) {
	// Handle GNU flexible array initializers.
	unsigned FlexArrayDiag;
	if (isa<InitListExpr>(InitExpr) &&
	cast<InitListExpr>(InitExpr)->getNumInits() == 0) {
	// Empty flexible array init always allowed as an extension
	FlexArrayDiag = diag::ext_flexible_array_init;
	} else if (!TopLevelObject) {
	// Disallow flexible array init on non-top-level object
	FlexArrayDiag = diag::err_flexible_array_init;
	} else if (Entity.getKind() != InitializedEntity::EK_Variable) {
	// Disallow flexible array init on anything which is not a variable.
	FlexArrayDiag = diag::err_flexible_array_init;
	} else if (cast<VarDecl>(Entity.getDecl())->hasLocalStorage()) {
	// Disallow flexible array init on local variables.
	FlexArrayDiag = diag::err_flexible_array_init;
	} else {
	// Allow other cases.
	FlexArrayDiag = diag::ext_flexible_array_init;
	}

	if (!VerifyOnly) {
	SemaRef.Diag(InitExpr->getBeginLoc(), FlexArrayDiag)
	<< InitExpr->getBeginLoc();
	SemaRef.Diag(Field->getLocation(), diag::note_flexible_array_member)
	<< Field;
	}

	return FlexArrayDiag != diag::ext_flexible_array_init;
	}

	void InitListChecker::CheckStructUnionTypes(
	const InitializedEntity &Entity, InitListExpr *IList, QualType DeclType,
	CXXRecordDecl::base_class_range Bases, RecordDecl::field_iterator Field,
	bool SubobjectIsDesignatorContext, unsigned &Index,
	InitListExpr *StructuredList, unsigned &StructuredIndex,
	bool TopLevelObject) {
	RecordDecl *structDecl = DeclType->castAs<RecordType>()->getDecl();

	// If the record is invalid, some of it's members are invalid. To avoid
	// confusion, we forgo checking the initializer for the entire record.
	if (structDecl->isInvalidDecl()) {
	// Assume it was supposed to consume a single initializer.
	++Index;
	hadError = true;
	return;
	}

	if (DeclType->isUnionType() && IList->getNumInits() == 0) {
	RecordDecl *RD = DeclType->castAs<RecordType>()->getDecl();

	if (!VerifyOnly)
	for (FieldDecl *FD : RD->fields()) {
	QualType ET = SemaRef.Context.getBaseElementType(FD->getType());
	if (checkDestructorReference(ET, IList->getEndLoc(), SemaRef)) {
	hadError = true;
	return;
	}
	}

	// If there's a default initializer, use it.
	if (isa<CXXRecordDecl>(RD) &&
	cast<CXXRecordDecl>(RD)->hasInClassInitializer()) {
	if (!StructuredList)
	return;
	for (RecordDecl::field_iterator FieldEnd = RD->field_end();
	Field != FieldEnd; ++Field) {
	if (Field->hasInClassInitializer()) {
	StructuredList->setInitializedFieldInUnion(*Field);
	// FIXME: Actually build a CXXDefaultInitExpr?
	return;
	}
	}
	}

	// Value-initialize the first member of the union that isn't an unnamed
	// bitfield.
	for (RecordDecl::field_iterator FieldEnd = RD->field_end();
	Field != FieldEnd; ++Field) {
	if (!Field->isUnnamedBitfield()) {
	CheckEmptyInitializable(
	InitializedEntity::InitializeMember(*Field, &Entity),
	IList->getEndLoc());
	if (StructuredList)
	StructuredList->setInitializedFieldInUnion(*Field);
	break;
	}
	}
	return;
	}

	bool InitializedSomething = false;

	// If we have any base classes, they are initialized prior to the fields.
	for (auto &Base : Bases) {
	Expr *Init = Index < IList->getNumInits() ? IList->getInit(Index) : nullptr;

	// Designated inits always initialize fields, so if we see one, all
	// remaining base classes have no explicit initializer.
	if (Init && isa<DesignatedInitExpr>(Init))
	Init = nullptr;

	SourceLocation InitLoc = Init ? Init->getBeginLoc() : IList->getEndLoc();
	InitializedEntity BaseEntity = InitializedEntity::InitializeBase(
	SemaRef.Context, &Base, false, &Entity);
	if (Init) {
	CheckSubElementType(BaseEntity, IList, Base.getType(), Index,
	StructuredList, StructuredIndex);
	InitializedSomething = true;
	} else {
	CheckEmptyInitializable(BaseEntity, InitLoc);
	}

	if (!VerifyOnly)
	if (checkDestructorReference(Base.getType(), InitLoc, SemaRef)) {
	hadError = true;
	return;
	}
	}

	// If structDecl is a forward declaration, this loop won't do
	// anything except look at designated initializers; That's okay,
	// because an error should get printed out elsewhere. It might be
	// worthwhile to skip over the rest of the initializer, though.
	RecordDecl *RD = DeclType->castAs<RecordType>()->getDecl();
	RecordDecl::field_iterator FieldEnd = RD->field_end();
	size_t NumRecordDecls = llvm::count_if(RD->decls(), [&](const Decl *D) {
	return isa<FieldDecl>(D) \|\| isa<RecordDecl>(D);
	});
	bool CheckForMissingFields =
	!IList->isIdiomaticZeroInitializer(SemaRef.getLangOpts());
	bool HasDesignatedInit = false;

	while (Index < IList->getNumInits()) {
	Expr *Init = IList->getInit(Index);
	SourceLocation InitLoc = Init->getBeginLoc();

	if (DesignatedInitExpr *DIE = dyn_cast<DesignatedInitExpr>(Init)) {
	// If we're not the subobject that matches up with the '{' for
	// the designator, we shouldn't be handling the
	// designator. Return immediately.
	if (!SubobjectIsDesignatorContext)
	return;

	HasDesignatedInit = true;

	// Handle this designated initializer. Field will be updated to
	// the next field that we'll be initializing.
	if (CheckDesignatedInitializer(Entity, IList, DIE, 0,
	DeclType, &Field, nullptr, Index,
	StructuredList, StructuredIndex,
	true, TopLevelObject))
	hadError = true;
	else if (!VerifyOnly) {
	// Find the field named by the designated initializer.
	RecordDecl::field_iterator F = RD->field_begin();
	while (std::next(F) != Field)
	++F;
	QualType ET = SemaRef.Context.getBaseElementType(F->getType());
	if (checkDestructorReference(ET, InitLoc, SemaRef)) {
	hadError = true;
	return;
	}
	}

	InitializedSomething = true;

	// Disable check for missing fields when designators are used.
	// This matches gcc behaviour.
	CheckForMissingFields = false;
	continue;
	}

	// Check if this is an initializer of forms:
	//
	// struct foo f = {};
	// struct foo g = {0};
	//
	// These are okay for randomized structures. [C99 6.7.8p19]
	//
	// Also, if there is only one element in the structure, we allow something
	// like this, because it's really not randomized in the tranditional sense.
	//
	// struct foo h = {bar};
	auto IsZeroInitializer = [&](const Expr *I) {
	if (IList->getNumInits() == 1) {
	if (NumRecordDecls == 1)
	return true;
	if (const auto *IL = dyn_cast<IntegerLiteral>(I))
	return IL->getValue().isZero();
	}
	return false;
	};

	// Don't allow non-designated initializers on randomized structures.
	if (RD->isRandomized() && !IsZeroInitializer(Init)) {
	if (!VerifyOnly)
	SemaRef.Diag(InitLoc, diag::err_non_designated_init_used);
	hadError = true;
	break;
	}

	if (Field == FieldEnd) {
	// We've run out of fields. We're done.
	break;
	}

	// We've already initialized a member of a union. We're done.
	if (InitializedSomething && DeclType->isUnionType())
	break;

	// If we've hit the flexible array member at the end, we're done.
	if (Field->getType()->isIncompleteArrayType())
	break;

	if (Field->isUnnamedBitfield()) {
	// Don't initialize unnamed bitfields, e.g. "int : 20;"
	++Field;
	continue;
	}

	// Make sure we can use this declaration.
	bool InvalidUse;
	if (VerifyOnly)
	InvalidUse = !SemaRef.CanUseDecl(*Field, TreatUnavailableAsInvalid);
	else
	InvalidUse = SemaRef.DiagnoseUseOfDecl(
	*Field, IList->getInit(Index)->getBeginLoc());
	if (InvalidUse) {
	++Index;
	++Field;
	hadError = true;
	continue;
	}

	if (!VerifyOnly) {
	QualType ET = SemaRef.Context.getBaseElementType(Field->getType());
	if (checkDestructorReference(ET, InitLoc, SemaRef)) {
	hadError = true;
	return;
	}
	}

	InitializedEntity MemberEntity =
	InitializedEntity::InitializeMember(*Field, &Entity);
	CheckSubElementType(MemberEntity, IList, Field->getType(), Index,
	StructuredList, StructuredIndex);
	InitializedSomething = true;

	if (DeclType->isUnionType() && StructuredList) {
	// Initialize the first field within the union.
	StructuredList->setInitializedFieldInUnion(*Field);
	}

	++Field;
	}

	// Emit warnings for missing struct field initializers.
	if (!VerifyOnly && InitializedSomething && CheckForMissingFields &&
	Field != FieldEnd && !Field->getType()->isIncompleteArrayType() &&
	!DeclType->isUnionType()) {
	// It is possible we have one or more unnamed bitfields remaining.
	// Find first (if any) named field and emit warning.
	for (RecordDecl::field_iterator it = Field, end = RD->field_end();
	it != end; ++it) {
	if (!it->isUnnamedBitfield() && !it->hasInClassInitializer()) {
	SemaRef.Diag(IList->getSourceRange().getEnd(),
	diag::warn_missing_field_initializers) << *it;
	break;
	}
	}
	}

	// Check that any remaining fields can be value-initialized if we're not
	// building a structured list. (If we are, we'll check this later.)
	if (!StructuredList && Field != FieldEnd && !DeclType->isUnionType() &&
	!Field->getType()->isIncompleteArrayType()) {
	for (; Field != FieldEnd && !hadError; ++Field) {
	if (!Field->isUnnamedBitfield() && !Field->hasInClassInitializer())
	CheckEmptyInitializable(
	InitializedEntity::InitializeMember(*Field, &Entity),
	IList->getEndLoc());
	}
	}

	// Check that the types of the remaining fields have accessible destructors.
	if (!VerifyOnly) {
	// If the initializer expression has a designated initializer, check the
	// elements for which a designated initializer is not provided too.
	RecordDecl::field_iterator I = HasDesignatedInit ? RD->field_begin()
	: Field;
	for (RecordDecl::field_iterator E = RD->field_end(); I != E; ++I) {
	QualType ET = SemaRef.Context.getBaseElementType(I->getType());
	if (checkDestructorReference(ET, IList->getEndLoc(), SemaRef)) {
	hadError = true;
	return;
	}
	}
	}

	if (Field == FieldEnd \|\| !Field->getType()->isIncompleteArrayType() \|\|
	Index >= IList->getNumInits())
	return;

	if (CheckFlexibleArrayInit(Entity, IList->getInit(Index), *Field,
	TopLevelObject)) {
	hadError = true;
	++Index;
	return;
	}

	InitializedEntity MemberEntity =
	InitializedEntity::InitializeMember(*Field, &Entity);

	if (isa<InitListExpr>(IList->getInit(Index)))
	CheckSubElementType(MemberEntity, IList, Field->getType(), Index,
	StructuredList, StructuredIndex);
	else
	CheckImplicitInitList(MemberEntity, IList, Field->getType(), Index,
	StructuredList, StructuredIndex);
	}

	/// Expand a field designator that refers to a member of an
	/// anonymous struct or union into a series of field designators that
	/// refers to the field within the appropriate subobject.
	///
	static void ExpandAnonymousFieldDesignator(Sema &SemaRef,
	DesignatedInitExpr *DIE,
	unsigned DesigIdx,
	IndirectFieldDecl *IndirectField) {
	typedef DesignatedInitExpr::Designator Designator;

	// Build the replacement designators.
	SmallVector<Designator, 4> Replacements;
	for (IndirectFieldDecl::chain_iterator PI = IndirectField->chain_begin(),
	PE = IndirectField->chain_end(); PI != PE; ++PI) {
	if (PI + 1 == PE)
	Replacements.push_back(Designator((IdentifierInfo *)nullptr,
	DIE->getDesignator(DesigIdx)->getDotLoc(),
	DIE->getDesignator(DesigIdx)->getFieldLoc()));
	else
	Replacements.push_back(Designator((IdentifierInfo *)nullptr,
	SourceLocation(), SourceLocation()));
	assert(isa<FieldDecl>(*PI));
	Replacements.back().setField(cast<FieldDecl>(*PI));
	}

	// Expand the current designator into the set of replacement
	// designators, so we have a full subobject path down to where the
	// member of the anonymous struct/union is actually stored.
	DIE->ExpandDesignator(SemaRef.Context, DesigIdx, &Replacements[0],
	&Replacements[0] + Replacements.size());
	}

	static DesignatedInitExpr *CloneDesignatedInitExpr(Sema &SemaRef,
	DesignatedInitExpr *DIE) {
	unsigned NumIndexExprs = DIE->getNumSubExprs() - 1;
	SmallVector<Expr*, 4> IndexExprs(NumIndexExprs);
	for (unsigned I = 0; I < NumIndexExprs; ++I)
	IndexExprs[I] = DIE->getSubExpr(I + 1);
	return DesignatedInitExpr::Create(SemaRef.Context, DIE->designators(),
	IndexExprs,
	DIE->getEqualOrColonLoc(),
	DIE->usesGNUSyntax(), DIE->getInit());
	}

	namespace {

	// Callback to only accept typo corrections that are for field members of
	// the given struct or union.
	class FieldInitializerValidatorCCC final : public CorrectionCandidateCallback {
	public:
	explicit FieldInitializerValidatorCCC(RecordDecl *RD)
	: Record(RD) {}

	bool ValidateCandidate(const TypoCorrection &candidate) override {
	FieldDecl *FD = candidate.getCorrectionDeclAs<FieldDecl>();
	return FD && FD->getDeclContext()->getRedeclContext()->Equals(Record);
	}

	std::unique_ptr<CorrectionCandidateCallback> clone() override {
	return std::make_unique<FieldInitializerValidatorCCC>(*this);
	}

	private:
	RecordDecl *Record;
	};

	} // end anonymous namespace

	/// Check the well-formedness of a C99 designated initializer.
	///
	/// Determines whether the designated initializer @p DIE, which
	/// resides at the given @p Index within the initializer list @p
	/// IList, is well-formed for a current object of type @p DeclType
	/// (C99 6.7.8). The actual subobject that this designator refers to
	/// within the current subobject is returned in either
	/// @p NextField or @p NextElementIndex (whichever is appropriate).
	///
	/// @param IList The initializer list in which this designated
	/// initializer occurs.
	///
	/// @param DIE The designated initializer expression.
	///
	/// @param DesigIdx The index of the current designator.
	///
	/// @param CurrentObjectType The type of the "current object" (C99 6.7.8p17),
	/// into which the designation in @p DIE should refer.
	///
	/// @param NextField If non-NULL and the first designator in @p DIE is
	/// a field, this will be set to the field declaration corresponding
	/// to the field named by the designator. On input, this is expected to be
	/// the next field that would be initialized in the absence of designation,
	/// if the complete object being initialized is a struct.
	///
	/// @param NextElementIndex If non-NULL and the first designator in @p
	/// DIE is an array designator or GNU array-range designator, this
	/// will be set to the last index initialized by this designator.
	///
	/// @param Index Index into @p IList where the designated initializer
	/// @p DIE occurs.
	///
	/// @param StructuredList The initializer list expression that
	/// describes all of the subobject initializers in the order they'll
	/// actually be initialized.
	///
	/// @returns true if there was an error, false otherwise.
	bool
	InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
	InitListExpr *IList,
	DesignatedInitExpr *DIE,
	unsigned DesigIdx,
	QualType &CurrentObjectType,
	RecordDecl::field_iterator *NextField,
	llvm::APSInt *NextElementIndex,
	unsigned &Index,
	InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	bool FinishSubobjectInit,
	bool TopLevelObject) {
	if (DesigIdx == DIE->size()) {
	// C++20 designated initialization can result in direct-list-initialization
	// of the designated subobject. This is the only way that we can end up
	// performing direct initialization as part of aggregate initialization, so
	// it needs special handling.
	if (DIE->isDirectInit()) {
	Expr *Init = DIE->getInit();
	assert(isa<InitListExpr>(Init) &&
	"designator result in direct non-list initialization?");
	InitializationKind Kind = InitializationKind::CreateDirectList(
	DIE->getBeginLoc(), Init->getBeginLoc(), Init->getEndLoc());
	InitializationSequence Seq(SemaRef, Entity, Kind, Init,
	/TopLevelOfInitList/ true);
	if (StructuredList) {
	ExprResult Result = VerifyOnly
	? getDummyInit()
	: Seq.Perform(SemaRef, Entity, Kind, Init);
	UpdateStructuredListElement(StructuredList, StructuredIndex,
	Result.get());
	}
	++Index;
	return !Seq;
	}

	// Check the actual initialization for the designated object type.
	bool prevHadError = hadError;

	// Temporarily remove the designator expression from the
	// initializer list that the child calls see, so that we don't try
	// to re-process the designator.
	unsigned OldIndex = Index;
	IList->setInit(OldIndex, DIE->getInit());

	CheckSubElementType(Entity, IList, CurrentObjectType, Index, StructuredList,
	StructuredIndex, /DirectlyDesignated=/true);

	// Restore the designated initializer expression in the syntactic
	// form of the initializer list.
	if (IList->getInit(OldIndex) != DIE->getInit())
	DIE->setInit(IList->getInit(OldIndex));
	IList->setInit(OldIndex, DIE);

	return hadError && !prevHadError;
	}

	DesignatedInitExpr::Designator *D = DIE->getDesignator(DesigIdx);
	bool IsFirstDesignator = (DesigIdx == 0);
	if (IsFirstDesignator ? FullyStructuredList : StructuredList) {
	// Determine the structural initializer list that corresponds to the
	// current subobject.
	if (IsFirstDesignator)
	StructuredList = FullyStructuredList;
	else {
	Expr *ExistingInit = StructuredIndex < StructuredList->getNumInits() ?
	StructuredList->getInit(StructuredIndex) : nullptr;
	if (!ExistingInit && StructuredList->hasArrayFiller())
	ExistingInit = StructuredList->getArrayFiller();

	if (!ExistingInit)
	StructuredList = getStructuredSubobjectInit(
	IList, Index, CurrentObjectType, StructuredList, StructuredIndex,
	SourceRange(D->getBeginLoc(), DIE->getEndLoc()));
	else if (InitListExpr *Result = dyn_cast<InitListExpr>(ExistingInit))
	StructuredList = Result;
	else {
	// We are creating an initializer list that initializes the
	// subobjects of the current object, but there was already an
	// initialization that completely initialized the current
	// subobject, e.g., by a compound literal:
	//
	// struct X { int a, b; };
	// struct X xs[] = { [0] = (struct X) { 1, 2 }, [0].b = 3 };
	//
	// Here, xs[0].a == 1 and xs[0].b == 3, since the second,
	// designated initializer re-initializes only its current object
	// subobject [0].b.
	diagnoseInitOverride(ExistingInit,
	SourceRange(D->getBeginLoc(), DIE->getEndLoc()),
	/FullyOverwritten=/false);

	if (!VerifyOnly) {
	if (DesignatedInitUpdateExpr *E =
	dyn_cast<DesignatedInitUpdateExpr>(ExistingInit))
	StructuredList = E->getUpdater();
	else {
	DesignatedInitUpdateExpr *DIUE = new (SemaRef.Context)
	DesignatedInitUpdateExpr(SemaRef.Context, D->getBeginLoc(),
	ExistingInit, DIE->getEndLoc());
	StructuredList->updateInit(SemaRef.Context, StructuredIndex, DIUE);
	StructuredList = DIUE->getUpdater();
	}
	} else {
	// We don't need to track the structured representation of a
	// designated init update of an already-fully-initialized object in
	// verify-only mode. The only reason we would need the structure is
	// to determine where the uninitialized "holes" are, and in this
	// case, we know there aren't any and we can't introduce any.
	StructuredList = nullptr;
	}
	}
	}
	}

	if (D->isFieldDesignator()) {
	// C99 6.7.8p7:
	//
	// If a designator has the form
	//
	// . identifier
	//
	// then the current object (defined below) shall have
	// structure or union type and the identifier shall be the
	// name of a member of that type.
	const RecordType *RT = CurrentObjectType->getAs<RecordType>();
	if (!RT) {
	SourceLocation Loc = D->getDotLoc();
	if (Loc.isInvalid())
	Loc = D->getFieldLoc();
	if (!VerifyOnly)
	SemaRef.Diag(Loc, diag::err_field_designator_non_aggr)
	<< SemaRef.getLangOpts().CPlusPlus << CurrentObjectType;
	++Index;
	return true;
	}

	FieldDecl *KnownField = D->getField();
	if (!KnownField) {
	IdentifierInfo *FieldName = D->getFieldName();
	DeclContext::lookup_result Lookup = RT->getDecl()->lookup(FieldName);
	for (NamedDecl *ND : Lookup) {
	if (auto *FD = dyn_cast<FieldDecl>(ND)) {
	KnownField = FD;
	break;
	}
	if (auto *IFD = dyn_cast<IndirectFieldDecl>(ND)) {
	// In verify mode, don't modify the original.
	if (VerifyOnly)
	DIE = CloneDesignatedInitExpr(SemaRef, DIE);
	ExpandAnonymousFieldDesignator(SemaRef, DIE, DesigIdx, IFD);
	D = DIE->getDesignator(DesigIdx);
	KnownField = cast<FieldDecl>(*IFD->chain_begin());
	break;
	}
	}
	if (!KnownField) {
	if (VerifyOnly) {
	++Index;
	return true; // No typo correction when just trying this out.
	}

	// Name lookup found something, but it wasn't a field.
	if (!Lookup.empty()) {
	SemaRef.Diag(D->getFieldLoc(), diag::err_field_designator_nonfield)
	<< FieldName;
	SemaRef.Diag(Lookup.front()->getLocation(),
	diag::note_field_designator_found);
	++Index;
	return true;
	}

	// Name lookup didn't find anything.
	// Determine whether this was a typo for another field name.
	FieldInitializerValidatorCCC CCC(RT->getDecl());
	if (TypoCorrection Corrected = SemaRef.CorrectTypo(
	DeclarationNameInfo(FieldName, D->getFieldLoc()),
	Sema::LookupMemberName, /Scope=/nullptr, /SS=/nullptr, CCC,
	Sema::CTK_ErrorRecovery, RT->getDecl())) {
	SemaRef.diagnoseTypo(
	Corrected,
	SemaRef.PDiag(diag::err_field_designator_unknown_suggest)
	<< FieldName << CurrentObjectType);
	KnownField = Corrected.getCorrectionDeclAs<FieldDecl>();
	hadError = true;
	} else {
	// Typo correction didn't find anything.
	SemaRef.Diag(D->getFieldLoc(), diag::err_field_designator_unknown)
	<< FieldName << CurrentObjectType;
	++Index;
	return true;
	}
	}
	}

	unsigned NumBases = 0;
	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
	NumBases = CXXRD->getNumBases();

	unsigned FieldIndex = NumBases;

	for (auto *FI : RT->getDecl()->fields()) {
	if (FI->isUnnamedBitfield())
	continue;
	if (declaresSameEntity(KnownField, FI)) {
	KnownField = FI;
	break;
	}
	++FieldIndex;
	}

	RecordDecl::field_iterator Field =
	RecordDecl::field_iterator(DeclContext::decl_iterator(KnownField));

	// All of the fields of a union are located at the same place in
	// the initializer list.
	if (RT->getDecl()->isUnion()) {
	FieldIndex = 0;
	if (StructuredList) {
	FieldDecl *CurrentField = StructuredList->getInitializedFieldInUnion();
	if (CurrentField && !declaresSameEntity(CurrentField, *Field)) {
	assert(StructuredList->getNumInits() == 1
	&& "A union should never have more than one initializer!");

	Expr *ExistingInit = StructuredList->getInit(0);
	if (ExistingInit) {
	// We're about to throw away an initializer, emit warning.
	diagnoseInitOverride(
	ExistingInit, SourceRange(D->getBeginLoc(), DIE->getEndLoc()));
	}

	// remove existing initializer
	StructuredList->resizeInits(SemaRef.Context, 0);
	StructuredList->setInitializedFieldInUnion(nullptr);
	}

	StructuredList->setInitializedFieldInUnion(*Field);
	}
	}

	// Make sure we can use this declaration.
	bool InvalidUse;
	if (VerifyOnly)
	InvalidUse = !SemaRef.CanUseDecl(*Field, TreatUnavailableAsInvalid);
	else
	InvalidUse = SemaRef.DiagnoseUseOfDecl(*Field, D->getFieldLoc());
	if (InvalidUse) {
	++Index;
	return true;
	}

	// C++20 [dcl.init.list]p3:
	// The ordered identifiers in the designators of the designated-
	// initializer-list shall form a subsequence of the ordered identifiers
	// in the direct non-static data members of T.
	//
	// Note that this is not a condition on forming the aggregate
	// initialization, only on actually performing initialization,
	// so it is not checked in VerifyOnly mode.
	//
	// FIXME: This is the only reordering diagnostic we produce, and it only
	// catches cases where we have a top-level field designator that jumps
	// backwards. This is the only such case that is reachable in an
	// otherwise-valid C++20 program, so is the only case that's required for
	// conformance, but for consistency, we should diagnose all the other
	// cases where a designator takes us backwards too.
	if (IsFirstDesignator && !VerifyOnly && SemaRef.getLangOpts().CPlusPlus &&
	NextField &&
	(*NextField == RT->getDecl()->field_end() \|\|
	(*NextField)->getFieldIndex() > Field->getFieldIndex() + 1)) {
	// Find the field that we just initialized.
	FieldDecl *PrevField = nullptr;
	for (auto FI = RT->getDecl()->field_begin();
	FI != RT->getDecl()->field_end(); ++FI) {
	if (FI->isUnnamedBitfield())
	continue;
	if (*NextField != RT->getDecl()->field_end() &&
	declaresSameEntity(FI, *NextField))
	break;
	PrevField = *FI;
	}

	if (PrevField &&
	PrevField->getFieldIndex() > KnownField->getFieldIndex()) {
	SemaRef.Diag(DIE->getBeginLoc(), diag::ext_designated_init_reordered)
	<< KnownField << PrevField << DIE->getSourceRange();

	unsigned OldIndex = NumBases + PrevField->getFieldIndex();
	if (StructuredList && OldIndex <= StructuredList->getNumInits()) {
	if (Expr *PrevInit = StructuredList->getInit(OldIndex)) {
	SemaRef.Diag(PrevInit->getBeginLoc(),
	diag::note_previous_field_init)
	<< PrevField << PrevInit->getSourceRange();
	}
	}
	}
	}


	// Update the designator with the field declaration.
	if (!VerifyOnly)
	D->setField(*Field);

	// Make sure that our non-designated initializer list has space
	// for a subobject corresponding to this field.
	if (StructuredList && FieldIndex >= StructuredList->getNumInits())
	StructuredList->resizeInits(SemaRef.Context, FieldIndex + 1);

	// This designator names a flexible array member.
	if (Field->getType()->isIncompleteArrayType()) {
	bool Invalid = false;
	if ((DesigIdx + 1) != DIE->size()) {
	// We can't designate an object within the flexible array
	// member (because GCC doesn't allow it).
	if (!VerifyOnly) {
	DesignatedInitExpr::Designator *NextD
	= DIE->getDesignator(DesigIdx + 1);
	SemaRef.Diag(NextD->getBeginLoc(),
	diag::err_designator_into_flexible_array_member)
	<< SourceRange(NextD->getBeginLoc(), DIE->getEndLoc());
	SemaRef.Diag(Field->getLocation(), diag::note_flexible_array_member)
	<< *Field;
	}
	Invalid = true;
	}

	if (!hadError && !isa<InitListExpr>(DIE->getInit()) &&
	!isa<StringLiteral>(DIE->getInit())) {
	// The initializer is not an initializer list.
	if (!VerifyOnly) {
	SemaRef.Diag(DIE->getInit()->getBeginLoc(),
	diag::err_flexible_array_init_needs_braces)
	<< DIE->getInit()->getSourceRange();
	SemaRef.Diag(Field->getLocation(), diag::note_flexible_array_member)
	<< *Field;
	}
	Invalid = true;
	}

	// Check GNU flexible array initializer.
	if (!Invalid && CheckFlexibleArrayInit(Entity, DIE->getInit(), *Field,
	TopLevelObject))
	Invalid = true;

	if (Invalid) {
	++Index;
	return true;
	}

	// Initialize the array.
	bool prevHadError = hadError;
	unsigned newStructuredIndex = FieldIndex;
	unsigned OldIndex = Index;
	IList->setInit(Index, DIE->getInit());

	InitializedEntity MemberEntity =
	InitializedEntity::InitializeMember(*Field, &Entity);
	CheckSubElementType(MemberEntity, IList, Field->getType(), Index,
	StructuredList, newStructuredIndex);

	IList->setInit(OldIndex, DIE);
	if (hadError && !prevHadError) {
	++Field;
	++FieldIndex;
	if (NextField)
	*NextField = Field;
	StructuredIndex = FieldIndex;
	return true;
	}
	} else {
	// Recurse to check later designated subobjects.
	QualType FieldType = Field->getType();
	unsigned newStructuredIndex = FieldIndex;

	InitializedEntity MemberEntity =
	InitializedEntity::InitializeMember(*Field, &Entity);
	if (CheckDesignatedInitializer(MemberEntity, IList, DIE, DesigIdx + 1,
	FieldType, nullptr, nullptr, Index,
	StructuredList, newStructuredIndex,
	FinishSubobjectInit, false))
	return true;
	}

	// Find the position of the next field to be initialized in this
	// subobject.
	++Field;
	++FieldIndex;

	// If this the first designator, our caller will continue checking
	// the rest of this struct/class/union subobject.
	if (IsFirstDesignator) {
	if (NextField)
	*NextField = Field;
	StructuredIndex = FieldIndex;
	return false;
	}

	if (!FinishSubobjectInit)
	return false;

	// We've already initialized something in the union; we're done.
	if (RT->getDecl()->isUnion())
	return hadError;

	// Check the remaining fields within this class/struct/union subobject.
	bool prevHadError = hadError;

	auto NoBases =
	CXXRecordDecl::base_class_range(CXXRecordDecl::base_class_iterator(),
	CXXRecordDecl::base_class_iterator());
	CheckStructUnionTypes(Entity, IList, CurrentObjectType, NoBases, Field,
	false, Index, StructuredList, FieldIndex);
	return hadError && !prevHadError;
	}

	// C99 6.7.8p6:
	//
	// If a designator has the form
	//
	// [ constant-expression ]
	//
	// then the current object (defined below) shall have array
	// type and the expression shall be an integer constant
	// expression. If the array is of unknown size, any
	// nonnegative value is valid.
	//
	// Additionally, cope with the GNU extension that permits
	// designators of the form
	//
	// [ constant-expression ... constant-expression ]
	const ArrayType *AT = SemaRef.Context.getAsArrayType(CurrentObjectType);
	if (!AT) {
	if (!VerifyOnly)
	SemaRef.Diag(D->getLBracketLoc(), diag::err_array_designator_non_array)
	<< CurrentObjectType;
	++Index;
	return true;
	}

	Expr *IndexExpr = nullptr;
	llvm::APSInt DesignatedStartIndex, DesignatedEndIndex;
	if (D->isArrayDesignator()) {
	IndexExpr = DIE->getArrayIndex(*D);
	DesignatedStartIndex = IndexExpr->EvaluateKnownConstInt(SemaRef.Context);
	DesignatedEndIndex = DesignatedStartIndex;
	} else {
	assert(D->isArrayRangeDesignator() && "Need array-range designator");

	DesignatedStartIndex =
	DIE->getArrayRangeStart(*D)->EvaluateKnownConstInt(SemaRef.Context);
	DesignatedEndIndex =
	DIE->getArrayRangeEnd(*D)->EvaluateKnownConstInt(SemaRef.Context);
	IndexExpr = DIE->getArrayRangeEnd(*D);

	// Codegen can't handle evaluating array range designators that have side
	// effects, because we replicate the AST value for each initialized element.
	// As such, set the sawArrayRangeDesignator() bit if we initialize multiple
	// elements with something that has a side effect, so codegen can emit an
	// "error unsupported" error instead of miscompiling the app.
	if (DesignatedStartIndex.getZExtValue()!=DesignatedEndIndex.getZExtValue()&&
	DIE->getInit()->HasSideEffects(SemaRef.Context) && !VerifyOnly)
	FullyStructuredList->sawArrayRangeDesignator();
	}

	if (isa<ConstantArrayType>(AT)) {
	llvm::APSInt MaxElements(cast<ConstantArrayType>(AT)->getSize(), false);
	DesignatedStartIndex
	= DesignatedStartIndex.extOrTrunc(MaxElements.getBitWidth());
	DesignatedStartIndex.setIsUnsigned(MaxElements.isUnsigned());
	DesignatedEndIndex
	= DesignatedEndIndex.extOrTrunc(MaxElements.getBitWidth());
	DesignatedEndIndex.setIsUnsigned(MaxElements.isUnsigned());
	if (DesignatedEndIndex >= MaxElements) {
	if (!VerifyOnly)
	SemaRef.Diag(IndexExpr->getBeginLoc(),
	diag::err_array_designator_too_large)
	<< toString(DesignatedEndIndex, 10) << toString(MaxElements, 10)
	<< IndexExpr->getSourceRange();
	++Index;
	return true;
	}
	} else {
	unsigned DesignatedIndexBitWidth =
	ConstantArrayType::getMaxSizeBits(SemaRef.Context);
	DesignatedStartIndex =
	DesignatedStartIndex.extOrTrunc(DesignatedIndexBitWidth);
	DesignatedEndIndex =
	DesignatedEndIndex.extOrTrunc(DesignatedIndexBitWidth);
	DesignatedStartIndex.setIsUnsigned(true);
	DesignatedEndIndex.setIsUnsigned(true);
	}

	bool IsStringLiteralInitUpdate =
	StructuredList && StructuredList->isStringLiteralInit();
	if (IsStringLiteralInitUpdate && VerifyOnly) {
	// We're just verifying an update to a string literal init. We don't need
	// to split the string up into individual characters to do that.
	StructuredList = nullptr;
	} else if (IsStringLiteralInitUpdate) {
	// We're modifying a string literal init; we have to decompose the string
	// so we can modify the individual characters.
	ASTContext &Context = SemaRef.Context;
	Expr *SubExpr = StructuredList->getInit(0)->IgnoreParenImpCasts();

	// Compute the character type
	QualType CharTy = AT->getElementType();

	// Compute the type of the integer literals.
	QualType PromotedCharTy = CharTy;
	if (Context.isPromotableIntegerType(CharTy))
	PromotedCharTy = Context.getPromotedIntegerType(CharTy);
	unsigned PromotedCharTyWidth = Context.getTypeSize(PromotedCharTy);

	if (StringLiteral *SL = dyn_cast<StringLiteral>(SubExpr)) {
	// Get the length of the string.
	uint64_t StrLen = SL->getLength();
	if (cast<ConstantArrayType>(AT)->getSize().ult(StrLen))
	StrLen = cast<ConstantArrayType>(AT)->getSize().getZExtValue();
	StructuredList->resizeInits(Context, StrLen);

	// Build a literal for each character in the string, and put them into
	// the init list.
	for (unsigned i = 0, e = StrLen; i != e; ++i) {
	llvm::APInt CodeUnit(PromotedCharTyWidth, SL->getCodeUnit(i));
	Expr *Init = new (Context) IntegerLiteral(
	Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc());
	if (CharTy != PromotedCharTy)
	Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast,
	Init, nullptr, VK_PRValue,
	FPOptionsOverride());
	StructuredList->updateInit(Context, i, Init);
	}
	} else {
	ObjCEncodeExpr *E = cast<ObjCEncodeExpr>(SubExpr);
	std::string Str;
	Context.getObjCEncodingForType(E->getEncodedType(), Str);

	// Get the length of the string.
	uint64_t StrLen = Str.size();
	if (cast<ConstantArrayType>(AT)->getSize().ult(StrLen))
	StrLen = cast<ConstantArrayType>(AT)->getSize().getZExtValue();
	StructuredList->resizeInits(Context, StrLen);

	// Build a literal for each character in the string, and put them into
	// the init list.
	for (unsigned i = 0, e = StrLen; i != e; ++i) {
	llvm::APInt CodeUnit(PromotedCharTyWidth, Str[i]);
	Expr *Init = new (Context) IntegerLiteral(
	Context, CodeUnit, PromotedCharTy, SubExpr->getExprLoc());
	if (CharTy != PromotedCharTy)
	Init = ImplicitCastExpr::Create(Context, CharTy, CK_IntegralCast,
	Init, nullptr, VK_PRValue,
	FPOptionsOverride());
	StructuredList->updateInit(Context, i, Init);
	}
	}
	}

	// Make sure that our non-designated initializer list has space
	// for a subobject corresponding to this array element.
	if (StructuredList &&
	DesignatedEndIndex.getZExtValue() >= StructuredList->getNumInits())
	StructuredList->resizeInits(SemaRef.Context,
	DesignatedEndIndex.getZExtValue() + 1);

	// Repeatedly perform subobject initializations in the range
	// [DesignatedStartIndex, DesignatedEndIndex].

	// Move to the next designator
	unsigned ElementIndex = DesignatedStartIndex.getZExtValue();
	unsigned OldIndex = Index;

	InitializedEntity ElementEntity =
	InitializedEntity::InitializeElement(SemaRef.Context, 0, Entity);

	while (DesignatedStartIndex <= DesignatedEndIndex) {
	// Recurse to check later designated subobjects.
	QualType ElementType = AT->getElementType();
	Index = OldIndex;

	ElementEntity.setElementIndex(ElementIndex);
	if (CheckDesignatedInitializer(
	ElementEntity, IList, DIE, DesigIdx + 1, ElementType, nullptr,
	nullptr, Index, StructuredList, ElementIndex,
	FinishSubobjectInit && (DesignatedStartIndex == DesignatedEndIndex),
	false))
	return true;

	// Move to the next index in the array that we'll be initializing.
	++DesignatedStartIndex;
	ElementIndex = DesignatedStartIndex.getZExtValue();
	}

	// If this the first designator, our caller will continue checking
	// the rest of this array subobject.
	if (IsFirstDesignator) {
	if (NextElementIndex)
	*NextElementIndex = DesignatedStartIndex;
	StructuredIndex = ElementIndex;
	return false;
	}

	if (!FinishSubobjectInit)
	return false;

	// Check the remaining elements within this array subobject.
	bool prevHadError = hadError;
	CheckArrayType(Entity, IList, CurrentObjectType, DesignatedStartIndex,
	/SubobjectIsDesignatorContext=/false, Index,
	StructuredList, ElementIndex);
	return hadError && !prevHadError;
	}

	// Get the structured initializer list for a subobject of type
	// @p CurrentObjectType.
	InitListExpr *
	InitListChecker::getStructuredSubobjectInit(InitListExpr *IList, unsigned Index,
	QualType CurrentObjectType,
	InitListExpr *StructuredList,
	unsigned StructuredIndex,
	SourceRange InitRange,
	bool IsFullyOverwritten) {
	if (!StructuredList)
	return nullptr;

	Expr *ExistingInit = nullptr;
	if (StructuredIndex < StructuredList->getNumInits())
	ExistingInit = StructuredList->getInit(StructuredIndex);

	if (InitListExpr *Result = dyn_cast_or_null<InitListExpr>(ExistingInit))
	// There might have already been initializers for subobjects of the current
	// object, but a subsequent initializer list will overwrite the entirety
	// of the current object. (See DR 253 and C99 6.7.8p21). e.g.,
	//
	// struct P { char x[6]; };
	// struct P l = { .x[2] = 'x', .x = { [0] = 'f' } };
	//
	// The first designated initializer is ignored, and l.x is just "f".
	if (!IsFullyOverwritten)
	return Result;

	if (ExistingInit) {
	// We are creating an initializer list that initializes the
	// subobjects of the current object, but there was already an
	// initialization that completely initialized the current
	// subobject:
	//
	// struct X { int a, b; };
	// struct X xs[] = { [0] = { 1, 2 }, [0].b = 3 };
	//
	// Here, xs[0].a == 1 and xs[0].b == 3, since the second,
	// designated initializer overwrites the [0].b initializer
	// from the prior initialization.
	//
	// When the existing initializer is an expression rather than an
	// initializer list, we cannot decompose and update it in this way.
	// For example:
	//
	// struct X xs[] = { [0] = (struct X) { 1, 2 }, [0].b = 3 };
	//
	// This case is handled by CheckDesignatedInitializer.
	diagnoseInitOverride(ExistingInit, InitRange);
	}

	unsigned ExpectedNumInits = 0;
	if (Index < IList->getNumInits()) {
	if (auto *Init = dyn_cast_or_null<InitListExpr>(IList->getInit(Index)))
	ExpectedNumInits = Init->getNumInits();
	else
	ExpectedNumInits = IList->getNumInits() - Index;
	}

	InitListExpr *Result =
	createInitListExpr(CurrentObjectType, InitRange, ExpectedNumInits);

	// Link this new initializer list into the structured initializer
	// lists.
	StructuredList->updateInit(SemaRef.Context, StructuredIndex, Result);
	return Result;
	}

	InitListExpr *
	InitListChecker::createInitListExpr(QualType CurrentObjectType,
	SourceRange InitRange,
	unsigned ExpectedNumInits) {
	InitListExpr *Result = new (SemaRef.Context) InitListExpr(
	SemaRef.Context, InitRange.getBegin(), std::nullopt, InitRange.getEnd());

	QualType ResultType = CurrentObjectType;
	if (!ResultType->isArrayType())
	ResultType = ResultType.getNonLValueExprType(SemaRef.Context);
	Result->setType(ResultType);

	// Pre-allocate storage for the structured initializer list.
	unsigned NumElements = 0;

	if (const ArrayType *AType
	= SemaRef.Context.getAsArrayType(CurrentObjectType)) {
	if (const ConstantArrayType *CAType = dyn_cast<ConstantArrayType>(AType)) {
	NumElements = CAType->getSize().getZExtValue();
	// Simple heuristic so that we don't allocate a very large
	// initializer with many empty entries at the end.
	if (NumElements > ExpectedNumInits)
	NumElements = 0;
	}
	} else if (const VectorType *VType = CurrentObjectType->getAs<VectorType>()) {
	NumElements = VType->getNumElements();
	} else if (CurrentObjectType->isRecordType()) {
	NumElements = numStructUnionElements(CurrentObjectType);
	}

	Result->reserveInits(SemaRef.Context, NumElements);

	return Result;
	}

	/// Update the initializer at index @p StructuredIndex within the
	/// structured initializer list to the value @p expr.
	void InitListChecker::UpdateStructuredListElement(InitListExpr *StructuredList,
	unsigned &StructuredIndex,
	Expr *expr) {
	// No structured initializer list to update
	if (!StructuredList)
	return;

	if (Expr *PrevInit = StructuredList->updateInit(SemaRef.Context,
	StructuredIndex, expr)) {
	// This initializer overwrites a previous initializer.
	// No need to diagnose when `expr` is nullptr because a more relevant
	// diagnostic has already been issued and this diagnostic is potentially
	// noise.
	if (expr)
	diagnoseInitOverride(PrevInit, expr->getSourceRange());
	}

	++StructuredIndex;
	}

	/// Determine whether we can perform aggregate initialization for the purposes
	/// of overload resolution.
	bool Sema::CanPerformAggregateInitializationForOverloadResolution(
	const InitializedEntity &Entity, InitListExpr *From) {
	QualType Type = Entity.getType();
	InitListChecker Check(this, Entity, From, Type, /VerifyOnly=*/true,
	/TreatUnavailableAsInvalid=/false,
	/InOverloadResolution=/true);
	return !Check.HadError();
	}

	/// Check that the given Index expression is a valid array designator
	/// value. This is essentially just a wrapper around
	/// VerifyIntegerConstantExpression that also checks for negative values
	/// and produces a reasonable diagnostic if there is a
	/// failure. Returns the index expression, possibly with an implicit cast
	/// added, on success. If everything went okay, Value will receive the
	/// value of the constant expression.
	static ExprResult
	CheckArrayDesignatorExpr(Sema &S, Expr *Index, llvm::APSInt &Value) {
	SourceLocation Loc = Index->getBeginLoc();

	// Make sure this is an integer constant expression.
	ExprResult Result =
	S.VerifyIntegerConstantExpression(Index, &Value, Sema::AllowFold);
	if (Result.isInvalid())
	return Result;

	if (Value.isSigned() && Value.isNegative())
	return S.Diag(Loc, diag::err_array_designator_negative)
	<< toString(Value, 10) << Index->getSourceRange();

	Value.setIsUnsigned(true);
	return Result;
	}

	ExprResult Sema::ActOnDesignatedInitializer(Designation &Desig,
	SourceLocation EqualOrColonLoc,
	bool GNUSyntax,
	ExprResult Init) {
	typedef DesignatedInitExpr::Designator ASTDesignator;

	bool Invalid = false;
	SmallVector<ASTDesignator, 32> Designators;
	SmallVector<Expr *, 32> InitExpressions;

	// Build designators and check array designator expressions.
	for (unsigned Idx = 0; Idx < Desig.getNumDesignators(); ++Idx) {
	const Designator &D = Desig.getDesignator(Idx);
	switch (D.getKind()) {
	case Designator::FieldDesignator:
	Designators.push_back(ASTDesignator(D.getField(), D.getDotLoc(),
	D.getFieldLoc()));
	break;

	case Designator::ArrayDesignator: {
	Expr Index = static_cast<Expr >(D.getArrayIndex());
	llvm::APSInt IndexValue;
	if (!Index->isTypeDependent() && !Index->isValueDependent())
	Index = CheckArrayDesignatorExpr(*this, Index, IndexValue).get();
	if (!Index)
	Invalid = true;
	else {
	Designators.push_back(ASTDesignator(InitExpressions.size(),
	D.getLBracketLoc(),
	D.getRBracketLoc()));
	InitExpressions.push_back(Index);
	}
	break;
	}

	case Designator::ArrayRangeDesignator: {
	Expr StartIndex = static_cast<Expr >(D.getArrayRangeStart());
	Expr EndIndex = static_cast<Expr >(D.getArrayRangeEnd());
	llvm::APSInt StartValue;
	llvm::APSInt EndValue;
	bool StartDependent = StartIndex->isTypeDependent() \|\|
	StartIndex->isValueDependent();
	bool EndDependent = EndIndex->isTypeDependent() \|\|
	EndIndex->isValueDependent();
	if (!StartDependent)
	StartIndex =
	CheckArrayDesignatorExpr(*this, StartIndex, StartValue).get();
	if (!EndDependent)
	EndIndex = CheckArrayDesignatorExpr(*this, EndIndex, EndValue).get();

	if (!StartIndex \|\| !EndIndex)
	Invalid = true;
	else {
	// Make sure we're comparing values with the same bit width.
	if (StartDependent \|\| EndDependent) {
	// Nothing to compute.
	} else if (StartValue.getBitWidth() > EndValue.getBitWidth())
	EndValue = EndValue.extend(StartValue.getBitWidth());
	else if (StartValue.getBitWidth() < EndValue.getBitWidth())
	StartValue = StartValue.extend(EndValue.getBitWidth());

	if (!StartDependent && !EndDependent && EndValue < StartValue) {
	Diag(D.getEllipsisLoc(), diag::err_array_designator_empty_range)
	<< toString(StartValue, 10) << toString(EndValue, 10)
	<< StartIndex->getSourceRange() << EndIndex->getSourceRange();
	Invalid = true;
	} else {
	Designators.push_back(ASTDesignator(InitExpressions.size(),
	D.getLBracketLoc(),
	D.getEllipsisLoc(),
	D.getRBracketLoc()));
	InitExpressions.push_back(StartIndex);
	InitExpressions.push_back(EndIndex);
	}
	}
	break;
	}
	}
	}

	if (Invalid \|\| Init.isInvalid())
	return ExprError();

	// Clear out the expressions within the designation.
	Desig.ClearExprs(*this);

	return DesignatedInitExpr::Create(Context, Designators, InitExpressions,
	EqualOrColonLoc, GNUSyntax,
	Init.getAs<Expr>());
	}

	//===----------------------------------------------------------------------===//
	// Initialization entity
	//===----------------------------------------------------------------------===//

	InitializedEntity::InitializedEntity(ASTContext &Context, unsigned Index,
	const InitializedEntity &Parent)
	: Parent(&Parent), Index(Index)
	{
	if (const ArrayType *AT = Context.getAsArrayType(Parent.getType())) {
	Kind = EK_ArrayElement;
	Type = AT->getElementType();
	} else if (const VectorType *VT = Parent.getType()->getAs<VectorType>()) {
	Kind = EK_VectorElement;
	Type = VT->getElementType();
	} else {
	const ComplexType *CT = Parent.getType()->getAs<ComplexType>();
	assert(CT && "Unexpected type");
	Kind = EK_ComplexElement;
	Type = CT->getElementType();
	}
	}

	InitializedEntity
	InitializedEntity::InitializeBase(ASTContext &Context,
	const CXXBaseSpecifier *Base,
	bool IsInheritedVirtualBase,
	const InitializedEntity *Parent) {
	InitializedEntity Result;
	Result.Kind = EK_Base;
	Result.Parent = Parent;
	Result.Base = {Base, IsInheritedVirtualBase};
	Result.Type = Base->getType();
	return Result;
	}

	DeclarationName InitializedEntity::getName() const {
	switch (getKind()) {
	case EK_Parameter:
	case EK_Parameter_CF_Audited: {
	ParmVarDecl *D = Parameter.getPointer();
	return (D ? D->getDeclName() : DeclarationName());
	}

	case EK_Variable:
	case EK_Member:
	+ case EK_ParenAggInitMember:
	case EK_Binding:
	case EK_TemplateParameter:
	return Variable.VariableOrMember->getDeclName();

	case EK_LambdaCapture:
	return DeclarationName(Capture.VarID);

	case EK_Result:
	case EK_StmtExprResult:
	case EK_Exception:
	case EK_New:
	case EK_Temporary:
	case EK_Base:
	case EK_Delegating:
	case EK_ArrayElement:
	case EK_VectorElement:
	case EK_ComplexElement:
	case EK_BlockElement:
	case EK_LambdaToBlockConversionBlockElement:
	case EK_CompoundLiteralInit:
	case EK_RelatedResult:
	return DeclarationName();
	}

	llvm_unreachable("Invalid EntityKind!");
	}

	ValueDecl *InitializedEntity::getDecl() const {
	switch (getKind()) {
	case EK_Variable:
	case EK_Member:
	+ case EK_ParenAggInitMember:
	case EK_Binding:
	case EK_TemplateParameter:
	return Variable.VariableOrMember;

	case EK_Parameter:
	case EK_Parameter_CF_Audited:
	return Parameter.getPointer();

	case EK_Result:
	case EK_StmtExprResult:
	case EK_Exception:
	case EK_New:
	case EK_Temporary:
	case EK_Base:
	case EK_Delegating:
	case EK_ArrayElement:
	case EK_VectorElement:
	case EK_ComplexElement:
	case EK_BlockElement:
	case EK_LambdaToBlockConversionBlockElement:
	case EK_LambdaCapture:
	case EK_CompoundLiteralInit:
	case EK_RelatedResult:
	return nullptr;
	}

	llvm_unreachable("Invalid EntityKind!");
	}

	bool InitializedEntity::allowsNRVO() const {
	switch (getKind()) {
	case EK_Result:
	case EK_Exception:
	return LocAndNRVO.NRVO;

	case EK_StmtExprResult:
	case EK_Variable:
	case EK_Parameter:
	case EK_Parameter_CF_Audited:
	case EK_TemplateParameter:
	case EK_Member:
	+ case EK_ParenAggInitMember:
	case EK_Binding:
	case EK_New:
	case EK_Temporary:
	case EK_CompoundLiteralInit:
	case EK_Base:
	case EK_Delegating:
	case EK_ArrayElement:
	case EK_VectorElement:
	case EK_ComplexElement:
	case EK_BlockElement:
	case EK_LambdaToBlockConversionBlockElement:
	case EK_LambdaCapture:
	case EK_RelatedResult:
	break;
	}

	return false;
	}

	unsigned InitializedEntity::dumpImpl(raw_ostream &OS) const {
	assert(getParent() != this);
	unsigned Depth = getParent() ? getParent()->dumpImpl(OS) : 0;
	for (unsigned I = 0; I != Depth; ++I)
	OS << "`-";

	switch (getKind()) {
	case EK_Variable: OS << "Variable"; break;
	case EK_Parameter: OS << "Parameter"; break;
	case EK_Parameter_CF_Audited: OS << "CF audited function Parameter";
	break;
	case EK_TemplateParameter: OS << "TemplateParameter"; break;
	case EK_Result: OS << "Result"; break;
	case EK_StmtExprResult: OS << "StmtExprResult"; break;
	case EK_Exception: OS << "Exception"; break;
	- case EK_Member: OS << "Member"; break;
	+ case EK_Member:
	+ case EK_ParenAggInitMember:
	+ OS << "Member";
	+ break;
	case EK_Binding: OS << "Binding"; break;
	case EK_New: OS << "New"; break;
	case EK_Temporary: OS << "Temporary"; break;
	case EK_CompoundLiteralInit: OS << "CompoundLiteral";break;
	case EK_RelatedResult: OS << "RelatedResult"; break;
	case EK_Base: OS << "Base"; break;
	case EK_Delegating: OS << "Delegating"; break;
	case EK_ArrayElement: OS << "ArrayElement " << Index; break;
	case EK_VectorElement: OS << "VectorElement " << Index; break;
	case EK_ComplexElement: OS << "ComplexElement " << Index; break;
	case EK_BlockElement: OS << "Block"; break;
	case EK_LambdaToBlockConversionBlockElement:
	OS << "Block (lambda)";
	break;
	case EK_LambdaCapture:
	OS << "LambdaCapture ";
	OS << DeclarationName(Capture.VarID);
	break;
	}

	if (auto *D = getDecl()) {
	OS << " ";
	D->printQualifiedName(OS);
	}

	OS << " '" << getType() << "'\n";

	return Depth + 1;
	}

	LLVM_DUMP_METHOD void InitializedEntity::dump() const {
	dumpImpl(llvm::errs());
	}

	//===----------------------------------------------------------------------===//
	// Initialization sequence
	//===----------------------------------------------------------------------===//

	void InitializationSequence::Step::Destroy() {
	switch (Kind) {
	case SK_ResolveAddressOfOverloadedFunction:
	case SK_CastDerivedToBasePRValue:
	case SK_CastDerivedToBaseXValue:
	case SK_CastDerivedToBaseLValue:
	case SK_BindReference:
	case SK_BindReferenceToTemporary:
	case SK_FinalCopy:
	case SK_ExtraneousCopyToTemporary:
	case SK_UserConversion:
	case SK_QualificationConversionPRValue:
	case SK_QualificationConversionXValue:
	case SK_QualificationConversionLValue:
	case SK_FunctionReferenceConversion:
	case SK_AtomicConversion:
	case SK_ListInitialization:
	case SK_UnwrapInitList:
	case SK_RewrapInitList:
	case SK_ConstructorInitialization:
	case SK_ConstructorInitializationFromList:
	case SK_ZeroInitialization:
	case SK_CAssignment:
	case SK_StringInit:
	case SK_ObjCObjectConversion:
	case SK_ArrayLoopIndex:
	case SK_ArrayLoopInit:
	case SK_ArrayInit:
	case SK_GNUArrayInit:
	case SK_ParenthesizedArrayInit:
	case SK_PassByIndirectCopyRestore:
	case SK_PassByIndirectRestore:
	case SK_ProduceObjCObject:
	case SK_StdInitializerList:
	case SK_StdInitializerListConstructorCall:
	case SK_OCLSamplerInit:
	case SK_OCLZeroOpaqueType:
	case SK_ParenthesizedListInit:
	break;

	case SK_ConversionSequence:
	case SK_ConversionSequenceNoNarrowing:
	delete ICS;
	}
	}

	bool InitializationSequence::isDirectReferenceBinding() const {
	// There can be some lvalue adjustments after the SK_BindReference step.
	for (const Step &S : llvm::reverse(Steps)) {
	if (S.Kind == SK_BindReference)
	return true;
	if (S.Kind == SK_BindReferenceToTemporary)
	return false;
	}
	return false;
	}

	bool InitializationSequence::isAmbiguous() const {
	if (!Failed())
	return false;

	switch (getFailureKind()) {
	case FK_TooManyInitsForReference:
	case FK_ParenthesizedListInitForReference:
	case FK_ArrayNeedsInitList:
	case FK_ArrayNeedsInitListOrStringLiteral:
	case FK_ArrayNeedsInitListOrWideStringLiteral:
	case FK_NarrowStringIntoWideCharArray:
	case FK_WideStringIntoCharArray:
	case FK_IncompatWideStringIntoWideChar:
	case FK_PlainStringIntoUTF8Char:
	case FK_UTF8StringIntoPlainChar:
	case FK_AddressOfOverloadFailed: // FIXME: Could do better
	case FK_NonConstLValueReferenceBindingToTemporary:
	case FK_NonConstLValueReferenceBindingToBitfield:
	case FK_NonConstLValueReferenceBindingToVectorElement:
	case FK_NonConstLValueReferenceBindingToMatrixElement:
	case FK_NonConstLValueReferenceBindingToUnrelated:
	case FK_RValueReferenceBindingToLValue:
	case FK_ReferenceAddrspaceMismatchTemporary:
	case FK_ReferenceInitDropsQualifiers:
	case FK_ReferenceInitFailed:
	case FK_ConversionFailed:
	case FK_ConversionFromPropertyFailed:
	case FK_TooManyInitsForScalar:
	case FK_ParenthesizedListInitForScalar:
	case FK_ReferenceBindingToInitList:
	case FK_InitListBadDestinationType:
	case FK_DefaultInitOfConst:
	case FK_Incomplete:
	case FK_ArrayTypeMismatch:
	case FK_NonConstantArrayInit:
	case FK_ListInitializationFailed:
	case FK_VariableLengthArrayHasInitializer:
	case FK_PlaceholderType:
	case FK_ExplicitConstructor:
	case FK_AddressOfUnaddressableFunction:
	case FK_ParenthesizedListInitFailed:
	return false;

	case FK_ReferenceInitOverloadFailed:
	case FK_UserConversionOverloadFailed:
	case FK_ConstructorOverloadFailed:
	case FK_ListConstructorOverloadFailed:
	return FailedOverloadResult == OR_Ambiguous;
	}

	llvm_unreachable("Invalid EntityKind!");
	}

	bool InitializationSequence::isConstructorInitialization() const {
	return !Steps.empty() && Steps.back().Kind == SK_ConstructorInitialization;
	}

	void
	InitializationSequence
	::AddAddressOverloadResolutionStep(FunctionDecl *Function,
	DeclAccessPair Found,
	bool HadMultipleCandidates) {
	Step S;
	S.Kind = SK_ResolveAddressOfOverloadedFunction;
	S.Type = Function->getType();
	S.Function.HadMultipleCandidates = HadMultipleCandidates;
	S.Function.Function = Function;
	S.Function.FoundDecl = Found;
	Steps.push_back(S);
	}

	void InitializationSequence::AddDerivedToBaseCastStep(QualType BaseType,
	ExprValueKind VK) {
	Step S;
	switch (VK) {
	case VK_PRValue:
	S.Kind = SK_CastDerivedToBasePRValue;
	break;
	case VK_XValue: S.Kind = SK_CastDerivedToBaseXValue; break;
	case VK_LValue: S.Kind = SK_CastDerivedToBaseLValue; break;
	}
	S.Type = BaseType;
	Steps.push_back(S);
	}

	void InitializationSequence::AddReferenceBindingStep(QualType T,
	bool BindingTemporary) {
	Step S;
	S.Kind = BindingTemporary? SK_BindReferenceToTemporary : SK_BindReference;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddFinalCopy(QualType T) {
	Step S;
	S.Kind = SK_FinalCopy;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddExtraneousCopyToTemporary(QualType T) {
	Step S;
	S.Kind = SK_ExtraneousCopyToTemporary;
	S.Type = T;
	Steps.push_back(S);
	}

	void
	InitializationSequence::AddUserConversionStep(FunctionDecl *Function,
	DeclAccessPair FoundDecl,
	QualType T,
	bool HadMultipleCandidates) {
	Step S;
	S.Kind = SK_UserConversion;
	S.Type = T;
	S.Function.HadMultipleCandidates = HadMultipleCandidates;
	S.Function.Function = Function;
	S.Function.FoundDecl = FoundDecl;
	Steps.push_back(S);
	}

	void InitializationSequence::AddQualificationConversionStep(QualType Ty,
	ExprValueKind VK) {
	Step S;
	S.Kind = SK_QualificationConversionPRValue; // work around a gcc warning
	switch (VK) {
	case VK_PRValue:
	S.Kind = SK_QualificationConversionPRValue;
	break;
	case VK_XValue:
	S.Kind = SK_QualificationConversionXValue;
	break;
	case VK_LValue:
	S.Kind = SK_QualificationConversionLValue;
	break;
	}
	S.Type = Ty;
	Steps.push_back(S);
	}

	void InitializationSequence::AddFunctionReferenceConversionStep(QualType Ty) {
	Step S;
	S.Kind = SK_FunctionReferenceConversion;
	S.Type = Ty;
	Steps.push_back(S);
	}

	void InitializationSequence::AddAtomicConversionStep(QualType Ty) {
	Step S;
	S.Kind = SK_AtomicConversion;
	S.Type = Ty;
	Steps.push_back(S);
	}

	void InitializationSequence::AddConversionSequenceStep(
	const ImplicitConversionSequence &ICS, QualType T,
	bool TopLevelOfInitList) {
	Step S;
	S.Kind = TopLevelOfInitList ? SK_ConversionSequenceNoNarrowing
	: SK_ConversionSequence;
	S.Type = T;
	S.ICS = new ImplicitConversionSequence(ICS);
	Steps.push_back(S);
	}

	void InitializationSequence::AddListInitializationStep(QualType T) {
	Step S;
	S.Kind = SK_ListInitialization;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddConstructorInitializationStep(
	DeclAccessPair FoundDecl, CXXConstructorDecl *Constructor, QualType T,
	bool HadMultipleCandidates, bool FromInitList, bool AsInitList) {
	Step S;
	S.Kind = FromInitList ? AsInitList ? SK_StdInitializerListConstructorCall
	: SK_ConstructorInitializationFromList
	: SK_ConstructorInitialization;
	S.Type = T;
	S.Function.HadMultipleCandidates = HadMultipleCandidates;
	S.Function.Function = Constructor;
	S.Function.FoundDecl = FoundDecl;
	Steps.push_back(S);
	}

	void InitializationSequence::AddZeroInitializationStep(QualType T) {
	Step S;
	S.Kind = SK_ZeroInitialization;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddCAssignmentStep(QualType T) {
	Step S;
	S.Kind = SK_CAssignment;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddStringInitStep(QualType T) {
	Step S;
	S.Kind = SK_StringInit;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddObjCObjectConversionStep(QualType T) {
	Step S;
	S.Kind = SK_ObjCObjectConversion;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddArrayInitStep(QualType T, bool IsGNUExtension) {
	Step S;
	S.Kind = IsGNUExtension ? SK_GNUArrayInit : SK_ArrayInit;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddArrayInitLoopStep(QualType T, QualType EltT) {
	Step S;
	S.Kind = SK_ArrayLoopIndex;
	S.Type = EltT;
	Steps.insert(Steps.begin(), S);

	S.Kind = SK_ArrayLoopInit;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddParenthesizedArrayInitStep(QualType T) {
	Step S;
	S.Kind = SK_ParenthesizedArrayInit;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddPassByIndirectCopyRestoreStep(QualType type,
	bool shouldCopy) {
	Step s;
	s.Kind = (shouldCopy ? SK_PassByIndirectCopyRestore
	: SK_PassByIndirectRestore);
	s.Type = type;
	Steps.push_back(s);
	}

	void InitializationSequence::AddProduceObjCObjectStep(QualType T) {
	Step S;
	S.Kind = SK_ProduceObjCObject;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddStdInitializerListConstructionStep(QualType T) {
	Step S;
	S.Kind = SK_StdInitializerList;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddOCLSamplerInitStep(QualType T) {
	Step S;
	S.Kind = SK_OCLSamplerInit;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddOCLZeroOpaqueTypeStep(QualType T) {
	Step S;
	S.Kind = SK_OCLZeroOpaqueType;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::AddParenthesizedListInitStep(QualType T) {
	Step S;
	S.Kind = SK_ParenthesizedListInit;
	S.Type = T;
	Steps.push_back(S);
	}

	void InitializationSequence::RewrapReferenceInitList(QualType T,
	InitListExpr *Syntactic) {
	assert(Syntactic->getNumInits() == 1 &&
	"Can only rewrap trivial init lists.");
	Step S;
	S.Kind = SK_UnwrapInitList;
	S.Type = Syntactic->getInit(0)->getType();
	Steps.insert(Steps.begin(), S);

	S.Kind = SK_RewrapInitList;
	S.Type = T;
	S.WrappingSyntacticList = Syntactic;
	Steps.push_back(S);
	}

	void InitializationSequence::SetOverloadFailure(FailureKind Failure,
	OverloadingResult Result) {
	setSequenceKind(FailedSequence);
	this->Failure = Failure;
	this->FailedOverloadResult = Result;
	}

	//===----------------------------------------------------------------------===//
	// Attempt initialization
	//===----------------------------------------------------------------------===//

	/// Tries to add a zero initializer. Returns true if that worked.
	static bool
	maybeRecoverWithZeroInitialization(Sema &S, InitializationSequence &Sequence,
	const InitializedEntity &Entity) {
	if (Entity.getKind() != InitializedEntity::EK_Variable)
	return false;

	VarDecl *VD = cast<VarDecl>(Entity.getDecl());
	if (VD->getInit() \|\| VD->getEndLoc().isMacroID())
	return false;

	QualType VariableTy = VD->getType().getCanonicalType();
	SourceLocation Loc = S.getLocForEndOfToken(VD->getEndLoc());
	std::string Init = S.getFixItZeroInitializerForType(VariableTy, Loc);
	if (!Init.empty()) {
	Sequence.AddZeroInitializationStep(Entity.getType());
	Sequence.SetZeroInitializationFixit(Init, Loc);
	return true;
	}
	return false;
	}

	static void MaybeProduceObjCObject(Sema &S,
	InitializationSequence &Sequence,
	const InitializedEntity &Entity) {
	if (!S.getLangOpts().ObjCAutoRefCount) return;

	/// When initializing a parameter, produce the value if it's marked
	/// __attribute__((ns_consumed)).
	if (Entity.isParameterKind()) {
	if (!Entity.isParameterConsumed())
	return;

	assert(Entity.getType()->isObjCRetainableType() &&
	"consuming an object of unretainable type?");
	Sequence.AddProduceObjCObjectStep(Entity.getType());

	/// When initializing a return value, if the return type is a
	/// retainable type, then returns need to immediately retain the
	/// object. If an autorelease is required, it will be done at the
	/// last instant.
	} else if (Entity.getKind() == InitializedEntity::EK_Result \|\|
	Entity.getKind() == InitializedEntity::EK_StmtExprResult) {
	if (!Entity.getType()->isObjCRetainableType())
	return;

	Sequence.AddProduceObjCObjectStep(Entity.getType());
	}
	}

	static void TryListInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	InitListExpr *InitList,
	InitializationSequence &Sequence,
	bool TreatUnavailableAsInvalid);

	/// When initializing from init list via constructor, handle
	/// initialization of an object of type std::initializer_list<T>.
	///
	/// \return true if we have handled initialization of an object of type
	/// std::initializer_list<T>, false otherwise.
	static bool TryInitializerListConstruction(Sema &S,
	InitListExpr *List,
	QualType DestType,
	InitializationSequence &Sequence,
	bool TreatUnavailableAsInvalid) {
	QualType E;
	if (!S.isStdInitializerList(DestType, &E))
	return false;

	if (!S.isCompleteType(List->getExprLoc(), E)) {
	Sequence.setIncompleteTypeFailure(E);
	return true;
	}

	// Try initializing a temporary array from the init list.
	QualType ArrayType = S.Context.getConstantArrayType(
	E.withConst(),
	llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()),
	List->getNumInits()),
	nullptr, clang::ArrayType::Normal, 0);
	InitializedEntity HiddenArray =
	InitializedEntity::InitializeTemporary(ArrayType);
	InitializationKind Kind = InitializationKind::CreateDirectList(
	List->getExprLoc(), List->getBeginLoc(), List->getEndLoc());
	TryListInitialization(S, HiddenArray, Kind, List, Sequence,
	TreatUnavailableAsInvalid);
	if (Sequence)
	Sequence.AddStdInitializerListConstructionStep(DestType);
	return true;
	}

	/// Determine if the constructor has the signature of a copy or move
	/// constructor for the type T of the class in which it was found. That is,
	/// determine if its first parameter is of type T or reference to (possibly
	/// cv-qualified) T.
	static bool hasCopyOrMoveCtorParam(ASTContext &Ctx,
	const ConstructorInfo &Info) {
	if (Info.Constructor->getNumParams() == 0)
	return false;

	QualType ParmT =
	Info.Constructor->getParamDecl(0)->getType().getNonReferenceType();
	QualType ClassT =
	Ctx.getRecordType(cast<CXXRecordDecl>(Info.FoundDecl->getDeclContext()));

	return Ctx.hasSameUnqualifiedType(ParmT, ClassT);
	}

	static OverloadingResult
	ResolveConstructorOverload(Sema &S, SourceLocation DeclLoc,
	MultiExprArg Args,
	OverloadCandidateSet &CandidateSet,
	QualType DestType,
	DeclContext::lookup_result Ctors,
	OverloadCandidateSet::iterator &Best,
	bool CopyInitializing, bool AllowExplicit,
	bool OnlyListConstructors, bool IsListInit,
	bool SecondStepOfCopyInit = false) {
	CandidateSet.clear(OverloadCandidateSet::CSK_InitByConstructor);
	CandidateSet.setDestAS(DestType.getQualifiers().getAddressSpace());

	for (NamedDecl *D : Ctors) {
	auto Info = getConstructorInfo(D);
	if (!Info.Constructor \|\| Info.Constructor->isInvalidDecl())
	continue;

	if (OnlyListConstructors && !S.isInitListConstructor(Info.Constructor))
	continue;

	// C++11 [over.best.ics]p4:
	// ... and the constructor or user-defined conversion function is a
	// candidate by
	// - 13.3.1.3, when the argument is the temporary in the second step
	// of a class copy-initialization, or
	// - 13.3.1.4, 13.3.1.5, or 13.3.1.6 (in all cases), [not handled here]
	// - the second phase of 13.3.1.7 when the initializer list has exactly
	// one element that is itself an initializer list, and the target is
	// the first parameter of a constructor of class X, and the conversion
	// is to X or reference to (possibly cv-qualified X),
	// user-defined conversion sequences are not considered.
	bool SuppressUserConversions =
	SecondStepOfCopyInit \|\|
	(IsListInit && Args.size() == 1 && isa<InitListExpr>(Args[0]) &&
	hasCopyOrMoveCtorParam(S.Context, Info));

	if (Info.ConstructorTmpl)
	S.AddTemplateOverloadCandidate(
	Info.ConstructorTmpl, Info.FoundDecl,
	/ExplicitArgs/ nullptr, Args, CandidateSet, SuppressUserConversions,
	/PartialOverloading=/false, AllowExplicit);
	else {
	// C++ [over.match.copy]p1:
	// - When initializing a temporary to be bound to the first parameter
	// of a constructor [for type T] that takes a reference to possibly
	// cv-qualified T as its first argument, called with a single
	// argument in the context of direct-initialization, explicit
	// conversion functions are also considered.
	// FIXME: What if a constructor template instantiates to such a signature?
	bool AllowExplicitConv = AllowExplicit && !CopyInitializing &&
	Args.size() == 1 &&
	hasCopyOrMoveCtorParam(S.Context, Info);
	S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl, Args,
	CandidateSet, SuppressUserConversions,
	/PartialOverloading=/false, AllowExplicit,
	AllowExplicitConv);
	}
	}

	// FIXME: Work around a bug in C++17 guaranteed copy elision.
	//
	// When initializing an object of class type T by constructor
	// ([over.match.ctor]) or by list-initialization ([over.match.list])
	// from a single expression of class type U, conversion functions of
	// U that convert to the non-reference type cv T are candidates.
	// Explicit conversion functions are only candidates during
	// direct-initialization.
	//
	// Note: SecondStepOfCopyInit is only ever true in this case when
	// evaluating whether to produce a C++98 compatibility warning.
	if (S.getLangOpts().CPlusPlus17 && Args.size() == 1 &&
	!SecondStepOfCopyInit) {
	Expr *Initializer = Args[0];
	auto *SourceRD = Initializer->getType()->getAsCXXRecordDecl();
	if (SourceRD && S.isCompleteType(DeclLoc, Initializer->getType())) {
	const auto &Conversions = SourceRD->getVisibleConversionFunctions();
	for (auto I = Conversions.begin(), E = Conversions.end(); I != E; ++I) {
	NamedDecl D = I;
	CXXRecordDecl *ActingDC = cast<CXXRecordDecl>(D->getDeclContext());
	D = D->getUnderlyingDecl();

	FunctionTemplateDecl *ConvTemplate = dyn_cast<FunctionTemplateDecl>(D);
	CXXConversionDecl *Conv;
	if (ConvTemplate)
	Conv = cast<CXXConversionDecl>(ConvTemplate->getTemplatedDecl());
	else
	Conv = cast<CXXConversionDecl>(D);

	if (ConvTemplate)
	S.AddTemplateConversionCandidate(
	ConvTemplate, I.getPair(), ActingDC, Initializer, DestType,
	CandidateSet, AllowExplicit, AllowExplicit,
	/AllowResultConversion/ false);
	else
	S.AddConversionCandidate(Conv, I.getPair(), ActingDC, Initializer,
	DestType, CandidateSet, AllowExplicit,
	AllowExplicit,
	/AllowResultConversion/ false);
	}
	}
	}

	// Perform overload resolution and return the result.
	return CandidateSet.BestViableFunction(S, DeclLoc, Best);
	}

	/// Attempt initialization by constructor (C++ [dcl.init]), which
	/// enumerates the constructors of the initialized entity and performs overload
	/// resolution to select the best.
	/// \param DestType The destination class type.
	/// \param DestArrayType The destination type, which is either DestType or
	/// a (possibly multidimensional) array of DestType.
	/// \param IsListInit Is this list-initialization?
	/// \param IsInitListCopy Is this non-list-initialization resulting from a
	/// list-initialization from {x} where x is the same
	/// type as the entity?
	static void TryConstructorInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	MultiExprArg Args, QualType DestType,
	QualType DestArrayType,
	InitializationSequence &Sequence,
	bool IsListInit = false,
	bool IsInitListCopy = false) {
	assert(((!IsListInit && !IsInitListCopy) \|\|
	(Args.size() == 1 && isa<InitListExpr>(Args[0]))) &&
	"IsListInit/IsInitListCopy must come with a single initializer list "
	"argument.");
	InitListExpr *ILE =
	(IsListInit \|\| IsInitListCopy) ? cast<InitListExpr>(Args[0]) : nullptr;
	MultiExprArg UnwrappedArgs =
	ILE ? MultiExprArg(ILE->getInits(), ILE->getNumInits()) : Args;

	// The type we're constructing needs to be complete.
	if (!S.isCompleteType(Kind.getLocation(), DestType)) {
	Sequence.setIncompleteTypeFailure(DestType);
	return;
	}

	// C++17 [dcl.init]p17:
	// - If the initializer expression is a prvalue and the cv-unqualified
	// version of the source type is the same class as the class of the
	// destination, the initializer expression is used to initialize the
	// destination object.
	// Per DR (no number yet), this does not apply when initializing a base
	// class or delegating to another constructor from a mem-initializer.
	// ObjC++: Lambda captured by the block in the lambda to block conversion
	// should avoid copy elision.
	if (S.getLangOpts().CPlusPlus17 &&
	Entity.getKind() != InitializedEntity::EK_Base &&
	Entity.getKind() != InitializedEntity::EK_Delegating &&
	Entity.getKind() !=
	InitializedEntity::EK_LambdaToBlockConversionBlockElement &&
	UnwrappedArgs.size() == 1 && UnwrappedArgs[0]->isPRValue() &&
	S.Context.hasSameUnqualifiedType(UnwrappedArgs[0]->getType(), DestType)) {
	// Convert qualifications if necessary.
	Sequence.AddQualificationConversionStep(DestType, VK_PRValue);
	if (ILE)
	Sequence.RewrapReferenceInitList(DestType, ILE);
	return;
	}

	const RecordType *DestRecordType = DestType->getAs<RecordType>();
	assert(DestRecordType && "Constructor initialization requires record type");
	CXXRecordDecl *DestRecordDecl
	= cast<CXXRecordDecl>(DestRecordType->getDecl());

	// Build the candidate set directly in the initialization sequence
	// structure, so that it will persist if we fail.
	OverloadCandidateSet &CandidateSet = Sequence.getFailedCandidateSet();

	// Determine whether we are allowed to call explicit constructors or
	// explicit conversion operators.
	bool AllowExplicit = Kind.AllowExplicit() \|\| IsListInit;
	bool CopyInitialization = Kind.getKind() == InitializationKind::IK_Copy;

	// - Otherwise, if T is a class type, constructors are considered. The
	// applicable constructors are enumerated, and the best one is chosen
	// through overload resolution.
	DeclContext::lookup_result Ctors = S.LookupConstructors(DestRecordDecl);

	OverloadingResult Result = OR_No_Viable_Function;
	OverloadCandidateSet::iterator Best;
	bool AsInitializerList = false;

	// C++11 [over.match.list]p1, per DR1467:
	// When objects of non-aggregate type T are list-initialized, such that
	// 8.5.4 [dcl.init.list] specifies that overload resolution is performed
	// according to the rules in this section, overload resolution selects
	// the constructor in two phases:
	//
	// - Initially, the candidate functions are the initializer-list
	// constructors of the class T and the argument list consists of the
	// initializer list as a single argument.
	if (IsListInit) {
	AsInitializerList = true;

	// If the initializer list has no elements and T has a default constructor,
	// the first phase is omitted.
	if (!(UnwrappedArgs.empty() && S.LookupDefaultConstructor(DestRecordDecl)))
	Result = ResolveConstructorOverload(S, Kind.getLocation(), Args,
	CandidateSet, DestType, Ctors, Best,
	CopyInitialization, AllowExplicit,
	/OnlyListConstructors=/true,
	IsListInit);
	}

	// C++11 [over.match.list]p1:
	// - If no viable initializer-list constructor is found, overload resolution
	// is performed again, where the candidate functions are all the
	// constructors of the class T and the argument list consists of the
	// elements of the initializer list.
	if (Result == OR_No_Viable_Function) {
	AsInitializerList = false;
	Result = ResolveConstructorOverload(S, Kind.getLocation(), UnwrappedArgs,
	CandidateSet, DestType, Ctors, Best,
	CopyInitialization, AllowExplicit,
	/OnlyListConstructors=/false,
	IsListInit);
	}
	if (Result) {
	Sequence.SetOverloadFailure(
	IsListInit ? InitializationSequence::FK_ListConstructorOverloadFailed
	: InitializationSequence::FK_ConstructorOverloadFailed,
	Result);

	if (Result != OR_Deleted)
	return;
	}

	bool HadMultipleCandidates = (CandidateSet.size() > 1);

	// In C++17, ResolveConstructorOverload can select a conversion function
	// instead of a constructor.
	if (auto *CD = dyn_cast<CXXConversionDecl>(Best->Function)) {
	// Add the user-defined conversion step that calls the conversion function.
	QualType ConvType = CD->getConversionType();
	assert(S.Context.hasSameUnqualifiedType(ConvType, DestType) &&
	"should not have selected this conversion function");
	Sequence.AddUserConversionStep(CD, Best->FoundDecl, ConvType,
	HadMultipleCandidates);
	if (!S.Context.hasSameType(ConvType, DestType))
	Sequence.AddQualificationConversionStep(DestType, VK_PRValue);
	if (IsListInit)
	Sequence.RewrapReferenceInitList(Entity.getType(), ILE);
	return;
	}

	CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
	if (Result != OR_Deleted) {
	// C++11 [dcl.init]p6:
	// If a program calls for the default initialization of an object
	// of a const-qualified type T, T shall be a class type with a
	// user-provided default constructor.
	// C++ core issue 253 proposal:
	// If the implicit default constructor initializes all subobjects, no
	// initializer should be required.
	// The 253 proposal is for example needed to process libstdc++ headers
	// in 5.x.
	if (Kind.getKind() == InitializationKind::IK_Default &&
	Entity.getType().isConstQualified()) {
	if (!CtorDecl->getParent()->allowConstDefaultInit()) {
	if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity))
	Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
	return;
	}
	}

	// C++11 [over.match.list]p1:
	// In copy-list-initialization, if an explicit constructor is chosen, the
	// initializer is ill-formed.
	if (IsListInit && !Kind.AllowExplicit() && CtorDecl->isExplicit()) {
	Sequence.SetFailed(InitializationSequence::FK_ExplicitConstructor);
	return;
	}
	}

	// [class.copy.elision]p3:
	// In some copy-initialization contexts, a two-stage overload resolution
	// is performed.
	// If the first overload resolution selects a deleted function, we also
	// need the initialization sequence to decide whether to perform the second
	// overload resolution.
	// For deleted functions in other contexts, there is no need to get the
	// initialization sequence.
	if (Result == OR_Deleted && Kind.getKind() != InitializationKind::IK_Copy)
	return;

	// Add the constructor initialization step. Any cv-qualification conversion is
	// subsumed by the initialization.
	Sequence.AddConstructorInitializationStep(
	Best->FoundDecl, CtorDecl, DestArrayType, HadMultipleCandidates,
	IsListInit \| IsInitListCopy, AsInitializerList);
	}

	static bool
	ResolveOverloadedFunctionForReferenceBinding(Sema &S,
	Expr *Initializer,
	QualType &SourceType,
	QualType &UnqualifiedSourceType,
	QualType UnqualifiedTargetType,
	InitializationSequence &Sequence) {
	if (S.Context.getCanonicalType(UnqualifiedSourceType) ==
	S.Context.OverloadTy) {
	DeclAccessPair Found;
	bool HadMultipleCandidates = false;
	if (FunctionDecl *Fn
	= S.ResolveAddressOfOverloadedFunction(Initializer,
	UnqualifiedTargetType,
	false, Found,
	&HadMultipleCandidates)) {
	Sequence.AddAddressOverloadResolutionStep(Fn, Found,
	HadMultipleCandidates);
	SourceType = Fn->getType();
	UnqualifiedSourceType = SourceType.getUnqualifiedType();
	} else if (!UnqualifiedTargetType->isRecordType()) {
	Sequence.SetFailed(InitializationSequence::FK_AddressOfOverloadFailed);
	return true;
	}
	}
	return false;
	}

	static void TryReferenceInitializationCore(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	Expr *Initializer,
	QualType cv1T1, QualType T1,
	Qualifiers T1Quals,
	QualType cv2T2, QualType T2,
	Qualifiers T2Quals,
	InitializationSequence &Sequence);

	static void TryValueInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	InitializationSequence &Sequence,
	InitListExpr *InitList = nullptr);

	/// Attempt list initialization of a reference.
	static void TryReferenceListInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	InitListExpr *InitList,
	InitializationSequence &Sequence,
	bool TreatUnavailableAsInvalid) {
	// First, catch C++03 where this isn't possible.
	if (!S.getLangOpts().CPlusPlus11) {
	Sequence.SetFailed(InitializationSequence::FK_ReferenceBindingToInitList);
	return;
	}
	// Can't reference initialize a compound literal.
	if (Entity.getKind() == InitializedEntity::EK_CompoundLiteralInit) {
	Sequence.SetFailed(InitializationSequence::FK_ReferenceBindingToInitList);
	return;
	}

	QualType DestType = Entity.getType();
	QualType cv1T1 = DestType->castAs<ReferenceType>()->getPointeeType();
	Qualifiers T1Quals;
	QualType T1 = S.Context.getUnqualifiedArrayType(cv1T1, T1Quals);

	// Reference initialization via an initializer list works thus:
	// If the initializer list consists of a single element that is
	// reference-related to the referenced type, bind directly to that element
	// (possibly creating temporaries).
	// Otherwise, initialize a temporary with the initializer list and
	// bind to that.
	if (InitList->getNumInits() == 1) {
	Expr *Initializer = InitList->getInit(0);
	QualType cv2T2 = S.getCompletedType(Initializer);
	Qualifiers T2Quals;
	QualType T2 = S.Context.getUnqualifiedArrayType(cv2T2, T2Quals);

	// If this fails, creating a temporary wouldn't work either.
	if (ResolveOverloadedFunctionForReferenceBinding(S, Initializer, cv2T2, T2,
	T1, Sequence))
	return;

	SourceLocation DeclLoc = Initializer->getBeginLoc();
	Sema::ReferenceCompareResult RefRelationship
	= S.CompareReferenceRelationship(DeclLoc, cv1T1, cv2T2);
	if (RefRelationship >= Sema::Ref_Related) {
	// Try to bind the reference here.
	TryReferenceInitializationCore(S, Entity, Kind, Initializer, cv1T1, T1,
	T1Quals, cv2T2, T2, T2Quals, Sequence);
	if (Sequence)
	Sequence.RewrapReferenceInitList(cv1T1, InitList);
	return;
	}

	// Update the initializer if we've resolved an overloaded function.
	if (Sequence.step_begin() != Sequence.step_end())
	Sequence.RewrapReferenceInitList(cv1T1, InitList);
	}
	// Perform address space compatibility check.
	QualType cv1T1IgnoreAS = cv1T1;
	if (T1Quals.hasAddressSpace()) {
	Qualifiers T2Quals;
	(void)S.Context.getUnqualifiedArrayType(InitList->getType(), T2Quals);
	if (!T1Quals.isAddressSpaceSupersetOf(T2Quals)) {
	Sequence.SetFailed(
	InitializationSequence::FK_ReferenceInitDropsQualifiers);
	return;
	}
	// Ignore address space of reference type at this point and perform address
	// space conversion after the reference binding step.
	cv1T1IgnoreAS =
	S.Context.getQualifiedType(T1, T1Quals.withoutAddressSpace());
	}
	// Not reference-related. Create a temporary and bind to that.
	InitializedEntity TempEntity =
	InitializedEntity::InitializeTemporary(cv1T1IgnoreAS);

	TryListInitialization(S, TempEntity, Kind, InitList, Sequence,
	TreatUnavailableAsInvalid);
	if (Sequence) {
	if (DestType->isRValueReferenceType() \|\|
	(T1Quals.hasConst() && !T1Quals.hasVolatile())) {
	Sequence.AddReferenceBindingStep(cv1T1IgnoreAS,
	/BindingTemporary=/true);
	if (T1Quals.hasAddressSpace())
	Sequence.AddQualificationConversionStep(
	cv1T1, DestType->isRValueReferenceType() ? VK_XValue : VK_LValue);
	} else
	Sequence.SetFailed(
	InitializationSequence::FK_NonConstLValueReferenceBindingToTemporary);
	}
	}

	/// Attempt list initialization (C++0x [dcl.init.list])
	static void TryListInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	InitListExpr *InitList,
	InitializationSequence &Sequence,
	bool TreatUnavailableAsInvalid) {
	QualType DestType = Entity.getType();

	// C++ doesn't allow scalar initialization with more than one argument.
	// But C99 complex numbers are scalars and it makes sense there.
	if (S.getLangOpts().CPlusPlus && DestType->isScalarType() &&
	!DestType->isAnyComplexType() && InitList->getNumInits() > 1) {
	Sequence.SetFailed(InitializationSequence::FK_TooManyInitsForScalar);
	return;
	}
	if (DestType->isReferenceType()) {
	TryReferenceListInitialization(S, Entity, Kind, InitList, Sequence,
	TreatUnavailableAsInvalid);
	return;
	}

	if (DestType->isRecordType() &&
	!S.isCompleteType(InitList->getBeginLoc(), DestType)) {
	Sequence.setIncompleteTypeFailure(DestType);
	return;
	}

	// C++11 [dcl.init.list]p3, per DR1467:
	// - If T is a class type and the initializer list has a single element of
	// type cv U, where U is T or a class derived from T, the object is
	// initialized from that element (by copy-initialization for
	// copy-list-initialization, or by direct-initialization for
	// direct-list-initialization).
	// - Otherwise, if T is a character array and the initializer list has a
	// single element that is an appropriately-typed string literal
	// (8.5.2 [dcl.init.string]), initialization is performed as described
	// in that section.
	// - Otherwise, if T is an aggregate, [...] (continue below).
	if (S.getLangOpts().CPlusPlus11 && InitList->getNumInits() == 1) {
	if (DestType->isRecordType()) {
	QualType InitType = InitList->getInit(0)->getType();
	if (S.Context.hasSameUnqualifiedType(InitType, DestType) \|\|
	S.IsDerivedFrom(InitList->getBeginLoc(), InitType, DestType)) {
	Expr *InitListAsExpr = InitList;
	TryConstructorInitialization(S, Entity, Kind, InitListAsExpr, DestType,
	DestType, Sequence,
	/InitListSyntax/false,
	/IsInitListCopy/true);
	return;
	}
	}
	if (const ArrayType *DestAT = S.Context.getAsArrayType(DestType)) {
	Expr *SubInit[1] = {InitList->getInit(0)};
	if (!isa<VariableArrayType>(DestAT) &&
	IsStringInit(SubInit[0], DestAT, S.Context) == SIF_None) {
	InitializationKind SubKind =
	Kind.getKind() == InitializationKind::IK_DirectList
	? InitializationKind::CreateDirect(Kind.getLocation(),
	InitList->getLBraceLoc(),
	InitList->getRBraceLoc())
	: Kind;
	Sequence.InitializeFrom(S, Entity, SubKind, SubInit,
	/TopLevelOfInitList/ true,
	TreatUnavailableAsInvalid);

	// TryStringLiteralInitialization() (in InitializeFrom()) will fail if
	// the element is not an appropriately-typed string literal, in which
	// case we should proceed as in C++11 (below).
	if (Sequence) {
	Sequence.RewrapReferenceInitList(Entity.getType(), InitList);
	return;
	}
	}
	}
	}

	// C++11 [dcl.init.list]p3:
	// - If T is an aggregate, aggregate initialization is performed.
	if ((DestType->isRecordType() && !DestType->isAggregateType()) \|\|
	(S.getLangOpts().CPlusPlus11 &&
	S.isStdInitializerList(DestType, nullptr))) {
	if (S.getLangOpts().CPlusPlus11) {
	// - Otherwise, if the initializer list has no elements and T is a
	// class type with a default constructor, the object is
	// value-initialized.
	if (InitList->getNumInits() == 0) {
	CXXRecordDecl *RD = DestType->getAsCXXRecordDecl();
	if (S.LookupDefaultConstructor(RD)) {
	TryValueInitialization(S, Entity, Kind, Sequence, InitList);
	return;
	}
	}

	// - Otherwise, if T is a specialization of std::initializer_list<E>,
	// an initializer_list object constructed [...]
	if (TryInitializerListConstruction(S, InitList, DestType, Sequence,
	TreatUnavailableAsInvalid))
	return;

	// - Otherwise, if T is a class type, constructors are considered.
	Expr *InitListAsExpr = InitList;
	TryConstructorInitialization(S, Entity, Kind, InitListAsExpr, DestType,
	DestType, Sequence, /InitListSyntax/true);
	} else
	Sequence.SetFailed(InitializationSequence::FK_InitListBadDestinationType);
	return;
	}

	if (S.getLangOpts().CPlusPlus && !DestType->isAggregateType() &&
	InitList->getNumInits() == 1) {
	Expr *E = InitList->getInit(0);

	// - Otherwise, if T is an enumeration with a fixed underlying type,
	// the initializer-list has a single element v, and the initialization
	// is direct-list-initialization, the object is initialized with the
	// value T(v); if a narrowing conversion is required to convert v to
	// the underlying type of T, the program is ill-formed.
	auto *ET = DestType->getAs<EnumType>();
	if (S.getLangOpts().CPlusPlus17 &&
	Kind.getKind() == InitializationKind::IK_DirectList &&
	ET && ET->getDecl()->isFixed() &&
	!S.Context.hasSameUnqualifiedType(E->getType(), DestType) &&
	(E->getType()->isIntegralOrUnscopedEnumerationType() \|\|
	E->getType()->isFloatingType())) {
	// There are two ways that T(v) can work when T is an enumeration type.
	// If there is either an implicit conversion sequence from v to T or
	// a conversion function that can convert from v to T, then we use that.
	// Otherwise, if v is of integral, unscoped enumeration, or floating-point
	// type, it is converted to the enumeration type via its underlying type.
	// There is no overlap possible between these two cases (except when the
	// source value is already of the destination type), and the first
	// case is handled by the general case for single-element lists below.
	ImplicitConversionSequence ICS;
	ICS.setStandard();
	ICS.Standard.setAsIdentityConversion();
	if (!E->isPRValue())
	ICS.Standard.First = ICK_Lvalue_To_Rvalue;
	// If E is of a floating-point type, then the conversion is ill-formed
	// due to narrowing, but go through the motions in order to produce the
	// right diagnostic.
	ICS.Standard.Second = E->getType()->isFloatingType()
	? ICK_Floating_Integral
	: ICK_Integral_Conversion;
	ICS.Standard.setFromType(E->getType());
	ICS.Standard.setToType(0, E->getType());
	ICS.Standard.setToType(1, DestType);
	ICS.Standard.setToType(2, DestType);
	Sequence.AddConversionSequenceStep(ICS, ICS.Standard.getToType(2),
	/TopLevelOfInitList/true);
	Sequence.RewrapReferenceInitList(Entity.getType(), InitList);
	return;
	}

	// - Otherwise, if the initializer list has a single element of type E
	// [...references are handled above...], the object or reference is
	// initialized from that element (by copy-initialization for
	// copy-list-initialization, or by direct-initialization for
	// direct-list-initialization); if a narrowing conversion is required
	// to convert the element to T, the program is ill-formed.
	//
	// Per core-24034, this is direct-initialization if we were performing
	// direct-list-initialization and copy-initialization otherwise.
	// We can't use InitListChecker for this, because it always performs
	// copy-initialization. This only matters if we might use an 'explicit'
	// conversion operator, or for the special case conversion of nullptr_t to
	// bool, so we only need to handle those cases.
	//
	// FIXME: Why not do this in all cases?
	Expr *Init = InitList->getInit(0);
	if (Init->getType()->isRecordType() \|\|
	(Init->getType()->isNullPtrType() && DestType->isBooleanType())) {
	InitializationKind SubKind =
	Kind.getKind() == InitializationKind::IK_DirectList
	? InitializationKind::CreateDirect(Kind.getLocation(),
	InitList->getLBraceLoc(),
	InitList->getRBraceLoc())
	: Kind;
	Expr *SubInit[1] = { Init };
	Sequence.InitializeFrom(S, Entity, SubKind, SubInit,
	/TopLevelOfInitList/true,
	TreatUnavailableAsInvalid);
	if (Sequence)
	Sequence.RewrapReferenceInitList(Entity.getType(), InitList);
	return;
	}
	}

	InitListChecker CheckInitList(S, Entity, InitList,
	DestType, /VerifyOnly=/true, TreatUnavailableAsInvalid);
	if (CheckInitList.HadError()) {
	Sequence.SetFailed(InitializationSequence::FK_ListInitializationFailed);
	return;
	}

	// Add the list initialization step with the built init list.
	Sequence.AddListInitializationStep(DestType);
	}

	/// Try a reference initialization that involves calling a conversion
	/// function.
	static OverloadingResult TryRefInitWithConversionFunction(
	Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
	Expr *Initializer, bool AllowRValues, bool IsLValueRef,
	InitializationSequence &Sequence) {
	QualType DestType = Entity.getType();
	QualType cv1T1 = DestType->castAs<ReferenceType>()->getPointeeType();
	QualType T1 = cv1T1.getUnqualifiedType();
	QualType cv2T2 = Initializer->getType();
	QualType T2 = cv2T2.getUnqualifiedType();

	assert(!S.CompareReferenceRelationship(Initializer->getBeginLoc(), T1, T2) &&
	"Must have incompatible references when binding via conversion");

	// Build the candidate set directly in the initialization sequence
	// structure, so that it will persist if we fail.
	OverloadCandidateSet &CandidateSet = Sequence.getFailedCandidateSet();
	CandidateSet.clear(OverloadCandidateSet::CSK_InitByUserDefinedConversion);

	// Determine whether we are allowed to call explicit conversion operators.
	// Note that none of [over.match.copy], [over.match.conv], nor
	// [over.match.ref] permit an explicit constructor to be chosen when
	// initializing a reference, not even for direct-initialization.
	bool AllowExplicitCtors = false;
	bool AllowExplicitConvs = Kind.allowExplicitConversionFunctionsInRefBinding();

	const RecordType *T1RecordType = nullptr;
	if (AllowRValues && (T1RecordType = T1->getAs<RecordType>()) &&
	S.isCompleteType(Kind.getLocation(), T1)) {
	// The type we're converting to is a class type. Enumerate its constructors
	// to see if there is a suitable conversion.
	CXXRecordDecl *T1RecordDecl = cast<CXXRecordDecl>(T1RecordType->getDecl());

	for (NamedDecl *D : S.LookupConstructors(T1RecordDecl)) {
	auto Info = getConstructorInfo(D);
	if (!Info.Constructor)
	continue;

	if (!Info.Constructor->isInvalidDecl() &&
	Info.Constructor->isConvertingConstructor(/AllowExplicit/true)) {
	if (Info.ConstructorTmpl)
	S.AddTemplateOverloadCandidate(
	Info.ConstructorTmpl, Info.FoundDecl,
	/ExplicitArgs/ nullptr, Initializer, CandidateSet,
	/SuppressUserConversions=/true,
	/PartialOverloading/ false, AllowExplicitCtors);
	else
	S.AddOverloadCandidate(
	Info.Constructor, Info.FoundDecl, Initializer, CandidateSet,
	/SuppressUserConversions=/true,
	/PartialOverloading/ false, AllowExplicitCtors);
	}
	}
	}
	if (T1RecordType && T1RecordType->getDecl()->isInvalidDecl())
	return OR_No_Viable_Function;

	const RecordType *T2RecordType = nullptr;
	if ((T2RecordType = T2->getAs<RecordType>()) &&
	S.isCompleteType(Kind.getLocation(), T2)) {
	// The type we're converting from is a class type, enumerate its conversion
	// functions.
	CXXRecordDecl *T2RecordDecl = cast<CXXRecordDecl>(T2RecordType->getDecl());

	const auto &Conversions = T2RecordDecl->getVisibleConversionFunctions();
	for (auto I = Conversions.begin(), E = Conversions.end(); I != E; ++I) {
	NamedDecl D = I;
	CXXRecordDecl *ActingDC = cast<CXXRecordDecl>(D->getDeclContext());
	if (isa<UsingShadowDecl>(D))
	D = cast<UsingShadowDecl>(D)->getTargetDecl();

	FunctionTemplateDecl *ConvTemplate = dyn_cast<FunctionTemplateDecl>(D);
	CXXConversionDecl *Conv;
	if (ConvTemplate)
	Conv = cast<CXXConversionDecl>(ConvTemplate->getTemplatedDecl());
	else
	Conv = cast<CXXConversionDecl>(D);

	// If the conversion function doesn't return a reference type,
	// it can't be considered for this conversion unless we're allowed to
	// consider rvalues.
	// FIXME: Do we need to make sure that we only consider conversion
	// candidates with reference-compatible results? That might be needed to
	// break recursion.
	if ((AllowRValues \|\|
	Conv->getConversionType()->isLValueReferenceType())) {
	if (ConvTemplate)
	S.AddTemplateConversionCandidate(
	ConvTemplate, I.getPair(), ActingDC, Initializer, DestType,
	CandidateSet,
	/AllowObjCConversionOnExplicit=/false, AllowExplicitConvs);
	else
	S.AddConversionCandidate(
	Conv, I.getPair(), ActingDC, Initializer, DestType, CandidateSet,
	/AllowObjCConversionOnExplicit=/false, AllowExplicitConvs);
	}
	}
	}
	if (T2RecordType && T2RecordType->getDecl()->isInvalidDecl())
	return OR_No_Viable_Function;

	SourceLocation DeclLoc = Initializer->getBeginLoc();

	// Perform overload resolution. If it fails, return the failed result.
	OverloadCandidateSet::iterator Best;
	if (OverloadingResult Result
	= CandidateSet.BestViableFunction(S, DeclLoc, Best))
	return Result;

	FunctionDecl *Function = Best->Function;
	// This is the overload that will be used for this initialization step if we
	// use this initialization. Mark it as referenced.
	Function->setReferenced();

	// Compute the returned type and value kind of the conversion.
	QualType cv3T3;
	if (isa<CXXConversionDecl>(Function))
	cv3T3 = Function->getReturnType();
	else
	cv3T3 = T1;

	ExprValueKind VK = VK_PRValue;
	if (cv3T3->isLValueReferenceType())
	VK = VK_LValue;
	else if (const auto *RRef = cv3T3->getAs<RValueReferenceType>())
	VK = RRef->getPointeeType()->isFunctionType() ? VK_LValue : VK_XValue;
	cv3T3 = cv3T3.getNonLValueExprType(S.Context);

	// Add the user-defined conversion step.
	bool HadMultipleCandidates = (CandidateSet.size() > 1);
	Sequence.AddUserConversionStep(Function, Best->FoundDecl, cv3T3,
	HadMultipleCandidates);

	// Determine whether we'll need to perform derived-to-base adjustments or
	// other conversions.
	Sema::ReferenceConversions RefConv;
	Sema::ReferenceCompareResult NewRefRelationship =
	S.CompareReferenceRelationship(DeclLoc, T1, cv3T3, &RefConv);

	// Add the final conversion sequence, if necessary.
	if (NewRefRelationship == Sema::Ref_Incompatible) {
	assert(!isa<CXXConstructorDecl>(Function) &&
	"should not have conversion after constructor");

	ImplicitConversionSequence ICS;
	ICS.setStandard();
	ICS.Standard = Best->FinalConversion;
	Sequence.AddConversionSequenceStep(ICS, ICS.Standard.getToType(2));

	// Every implicit conversion results in a prvalue, except for a glvalue
	// derived-to-base conversion, which we handle below.
	cv3T3 = ICS.Standard.getToType(2);
	VK = VK_PRValue;
	}

	// If the converted initializer is a prvalue, its type T4 is adjusted to
	// type "cv1 T4" and the temporary materialization conversion is applied.
	//
	// We adjust the cv-qualifications to match the reference regardless of
	// whether we have a prvalue so that the AST records the change. In this
	// case, T4 is "cv3 T3".
	QualType cv1T4 = S.Context.getQualifiedType(cv3T3, cv1T1.getQualifiers());
	if (cv1T4.getQualifiers() != cv3T3.getQualifiers())
	Sequence.AddQualificationConversionStep(cv1T4, VK);
	Sequence.AddReferenceBindingStep(cv1T4, VK == VK_PRValue);
	VK = IsLValueRef ? VK_LValue : VK_XValue;

	if (RefConv & Sema::ReferenceConversions::DerivedToBase)
	Sequence.AddDerivedToBaseCastStep(cv1T1, VK);
	else if (RefConv & Sema::ReferenceConversions::ObjC)
	Sequence.AddObjCObjectConversionStep(cv1T1);
	else if (RefConv & Sema::ReferenceConversions::Function)
	Sequence.AddFunctionReferenceConversionStep(cv1T1);
	else if (RefConv & Sema::ReferenceConversions::Qualification) {
	if (!S.Context.hasSameType(cv1T4, cv1T1))
	Sequence.AddQualificationConversionStep(cv1T1, VK);
	}

	return OR_Success;
	}

	static void CheckCXX98CompatAccessibleCopy(Sema &S,
	const InitializedEntity &Entity,
	Expr *CurInitExpr);

	/// Attempt reference initialization (C++0x [dcl.init.ref])
	static void TryReferenceInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	Expr *Initializer,
	InitializationSequence &Sequence) {
	QualType DestType = Entity.getType();
	QualType cv1T1 = DestType->castAs<ReferenceType>()->getPointeeType();
	Qualifiers T1Quals;
	QualType T1 = S.Context.getUnqualifiedArrayType(cv1T1, T1Quals);
	QualType cv2T2 = S.getCompletedType(Initializer);
	Qualifiers T2Quals;
	QualType T2 = S.Context.getUnqualifiedArrayType(cv2T2, T2Quals);

	// If the initializer is the address of an overloaded function, try
	// to resolve the overloaded function. If all goes well, T2 is the
	// type of the resulting function.
	if (ResolveOverloadedFunctionForReferenceBinding(S, Initializer, cv2T2, T2,
	T1, Sequence))
	return;

	// Delegate everything else to a subfunction.
	TryReferenceInitializationCore(S, Entity, Kind, Initializer, cv1T1, T1,
	T1Quals, cv2T2, T2, T2Quals, Sequence);
	}

	/// Determine whether an expression is a non-referenceable glvalue (one to
	/// which a reference can never bind). Attempting to bind a reference to
	/// such a glvalue will always create a temporary.
	static bool isNonReferenceableGLValue(Expr *E) {
	return E->refersToBitField() \|\| E->refersToVectorElement() \|\|
	E->refersToMatrixElement();
	}

	/// Reference initialization without resolving overloaded functions.
	///
	/// We also can get here in C if we call a builtin which is declared as
	/// a function with a parameter of reference type (such as __builtin_va_end()).
	static void TryReferenceInitializationCore(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	Expr *Initializer,
	QualType cv1T1, QualType T1,
	Qualifiers T1Quals,
	QualType cv2T2, QualType T2,
	Qualifiers T2Quals,
	InitializationSequence &Sequence) {
	QualType DestType = Entity.getType();
	SourceLocation DeclLoc = Initializer->getBeginLoc();

	// Compute some basic properties of the types and the initializer.
	bool isLValueRef = DestType->isLValueReferenceType();
	bool isRValueRef = !isLValueRef;
	Expr::Classification InitCategory = Initializer->Classify(S.Context);

	Sema::ReferenceConversions RefConv;
	Sema::ReferenceCompareResult RefRelationship =
	S.CompareReferenceRelationship(DeclLoc, cv1T1, cv2T2, &RefConv);

	// C++0x [dcl.init.ref]p5:
	// A reference to type "cv1 T1" is initialized by an expression of type
	// "cv2 T2" as follows:
	//
	// - If the reference is an lvalue reference and the initializer
	// expression
	// Note the analogous bullet points for rvalue refs to functions. Because
	// there are no function rvalues in C++, rvalue refs to functions are treated
	// like lvalue refs.
	OverloadingResult ConvOvlResult = OR_Success;
	bool T1Function = T1->isFunctionType();
	if (isLValueRef \|\| T1Function) {
	if (InitCategory.isLValue() && !isNonReferenceableGLValue(Initializer) &&
	(RefRelationship == Sema::Ref_Compatible \|\|
	(Kind.isCStyleOrFunctionalCast() &&
	RefRelationship == Sema::Ref_Related))) {
	// - is an lvalue (but is not a bit-field), and "cv1 T1" is
	// reference-compatible with "cv2 T2," or
	if (RefConv & (Sema::ReferenceConversions::DerivedToBase \|
	Sema::ReferenceConversions::ObjC)) {
	// If we're converting the pointee, add any qualifiers first;
	// these qualifiers must all be top-level, so just convert to "cv1 T2".
	if (RefConv & (Sema::ReferenceConversions::Qualification))
	Sequence.AddQualificationConversionStep(
	S.Context.getQualifiedType(T2, T1Quals),
	Initializer->getValueKind());
	if (RefConv & Sema::ReferenceConversions::DerivedToBase)
	Sequence.AddDerivedToBaseCastStep(cv1T1, VK_LValue);
	else
	Sequence.AddObjCObjectConversionStep(cv1T1);
	} else if (RefConv & Sema::ReferenceConversions::Qualification) {
	// Perform a (possibly multi-level) qualification conversion.
	Sequence.AddQualificationConversionStep(cv1T1,
	Initializer->getValueKind());
	} else if (RefConv & Sema::ReferenceConversions::Function) {
	Sequence.AddFunctionReferenceConversionStep(cv1T1);
	}

	// We only create a temporary here when binding a reference to a
	// bit-field or vector element. Those cases are't supposed to be
	// handled by this bullet, but the outcome is the same either way.
	Sequence.AddReferenceBindingStep(cv1T1, false);
	return;
	}

	// - has a class type (i.e., T2 is a class type), where T1 is not
	// reference-related to T2, and can be implicitly converted to an
	// lvalue of type "cv3 T3," where "cv1 T1" is reference-compatible
	// with "cv3 T3" (this conversion is selected by enumerating the
	// applicable conversion functions (13.3.1.6) and choosing the best
	// one through overload resolution (13.3)),
	// If we have an rvalue ref to function type here, the rhs must be
	// an rvalue. DR1287 removed the "implicitly" here.
	if (RefRelationship == Sema::Ref_Incompatible && T2->isRecordType() &&
	(isLValueRef \|\| InitCategory.isRValue())) {
	if (S.getLangOpts().CPlusPlus) {
	// Try conversion functions only for C++.
	ConvOvlResult = TryRefInitWithConversionFunction(
	S, Entity, Kind, Initializer, /AllowRValues/ isRValueRef,
	/IsLValueRef/ isLValueRef, Sequence);
	if (ConvOvlResult == OR_Success)
	return;
	if (ConvOvlResult != OR_No_Viable_Function)
	Sequence.SetOverloadFailure(
	InitializationSequence::FK_ReferenceInitOverloadFailed,
	ConvOvlResult);
	} else {
	ConvOvlResult = OR_No_Viable_Function;
	}
	}
	}

	// - Otherwise, the reference shall be an lvalue reference to a
	// non-volatile const type (i.e., cv1 shall be const), or the reference
	// shall be an rvalue reference.
	// For address spaces, we interpret this to mean that an addr space
	// of a reference "cv1 T1" is a superset of addr space of "cv2 T2".
	if (isLValueRef && !(T1Quals.hasConst() && !T1Quals.hasVolatile() &&
	T1Quals.isAddressSpaceSupersetOf(T2Quals))) {
	if (S.Context.getCanonicalType(T2) == S.Context.OverloadTy)
	Sequence.SetFailed(InitializationSequence::FK_AddressOfOverloadFailed);
	else if (ConvOvlResult && !Sequence.getFailedCandidateSet().empty())
	Sequence.SetOverloadFailure(
	InitializationSequence::FK_ReferenceInitOverloadFailed,
	ConvOvlResult);
	else if (!InitCategory.isLValue())
	Sequence.SetFailed(
	T1Quals.isAddressSpaceSupersetOf(T2Quals)
	? InitializationSequence::
	FK_NonConstLValueReferenceBindingToTemporary
	: InitializationSequence::FK_ReferenceInitDropsQualifiers);
	else {
	InitializationSequence::FailureKind FK;
	switch (RefRelationship) {
	case Sema::Ref_Compatible:
	if (Initializer->refersToBitField())
	FK = InitializationSequence::
	FK_NonConstLValueReferenceBindingToBitfield;
	else if (Initializer->refersToVectorElement())
	FK = InitializationSequence::
	FK_NonConstLValueReferenceBindingToVectorElement;
	else if (Initializer->refersToMatrixElement())
	FK = InitializationSequence::
	FK_NonConstLValueReferenceBindingToMatrixElement;
	else
	llvm_unreachable("unexpected kind of compatible initializer");
	break;
	case Sema::Ref_Related:
	FK = InitializationSequence::FK_ReferenceInitDropsQualifiers;
	break;
	case Sema::Ref_Incompatible:
	FK = InitializationSequence::
	FK_NonConstLValueReferenceBindingToUnrelated;
	break;
	}
	Sequence.SetFailed(FK);
	}
	return;
	}

	// - If the initializer expression
	// - is an
	// [<=14] xvalue (but not a bit-field), class prvalue, array prvalue, or
	// [1z] rvalue (but not a bit-field) or
	// function lvalue and "cv1 T1" is reference-compatible with "cv2 T2"
	//
	// Note: functions are handled above and below rather than here...
	if (!T1Function &&
	(RefRelationship == Sema::Ref_Compatible \|\|
	(Kind.isCStyleOrFunctionalCast() &&
	RefRelationship == Sema::Ref_Related)) &&
	((InitCategory.isXValue() && !isNonReferenceableGLValue(Initializer)) \|\|
	(InitCategory.isPRValue() &&
	(S.getLangOpts().CPlusPlus17 \|\| T2->isRecordType() \|\|
	T2->isArrayType())))) {
	ExprValueKind ValueKind = InitCategory.isXValue() ? VK_XValue : VK_PRValue;
	if (InitCategory.isPRValue() && T2->isRecordType()) {
	// The corresponding bullet in C++03 [dcl.init.ref]p5 gives the
	// compiler the freedom to perform a copy here or bind to the
	// object, while C++0x requires that we bind directly to the
	// object. Hence, we always bind to the object without making an
	// extra copy. However, in C++03 requires that we check for the
	// presence of a suitable copy constructor:
	//
	// The constructor that would be used to make the copy shall
	// be callable whether or not the copy is actually done.
	if (!S.getLangOpts().CPlusPlus11 && !S.getLangOpts().MicrosoftExt)
	Sequence.AddExtraneousCopyToTemporary(cv2T2);
	else if (S.getLangOpts().CPlusPlus11)
	CheckCXX98CompatAccessibleCopy(S, Entity, Initializer);
	}

	// C++1z [dcl.init.ref]/5.2.1.2:
	// If the converted initializer is a prvalue, its type T4 is adjusted
	// to type "cv1 T4" and the temporary materialization conversion is
	// applied.
	// Postpone address space conversions to after the temporary materialization
	// conversion to allow creating temporaries in the alloca address space.
	auto T1QualsIgnoreAS = T1Quals;
	auto T2QualsIgnoreAS = T2Quals;
	if (T1Quals.getAddressSpace() != T2Quals.getAddressSpace()) {
	T1QualsIgnoreAS.removeAddressSpace();
	T2QualsIgnoreAS.removeAddressSpace();
	}
	QualType cv1T4 = S.Context.getQualifiedType(cv2T2, T1QualsIgnoreAS);
	if (T1QualsIgnoreAS != T2QualsIgnoreAS)
	Sequence.AddQualificationConversionStep(cv1T4, ValueKind);
	Sequence.AddReferenceBindingStep(cv1T4, ValueKind == VK_PRValue);
	ValueKind = isLValueRef ? VK_LValue : VK_XValue;
	// Add addr space conversion if required.
	if (T1Quals.getAddressSpace() != T2Quals.getAddressSpace()) {
	auto T4Quals = cv1T4.getQualifiers();
	T4Quals.addAddressSpace(T1Quals.getAddressSpace());
	QualType cv1T4WithAS = S.Context.getQualifiedType(T2, T4Quals);
	Sequence.AddQualificationConversionStep(cv1T4WithAS, ValueKind);
	cv1T4 = cv1T4WithAS;
	}

	// In any case, the reference is bound to the resulting glvalue (or to
	// an appropriate base class subobject).
	if (RefConv & Sema::ReferenceConversions::DerivedToBase)
	Sequence.AddDerivedToBaseCastStep(cv1T1, ValueKind);
	else if (RefConv & Sema::ReferenceConversions::ObjC)
	Sequence.AddObjCObjectConversionStep(cv1T1);
	else if (RefConv & Sema::ReferenceConversions::Qualification) {
	if (!S.Context.hasSameType(cv1T4, cv1T1))
	Sequence.AddQualificationConversionStep(cv1T1, ValueKind);
	}
	return;
	}

	// - has a class type (i.e., T2 is a class type), where T1 is not
	// reference-related to T2, and can be implicitly converted to an
	// xvalue, class prvalue, or function lvalue of type "cv3 T3",
	// where "cv1 T1" is reference-compatible with "cv3 T3",
	//
	// DR1287 removes the "implicitly" here.
	if (T2->isRecordType()) {
	if (RefRelationship == Sema::Ref_Incompatible) {
	ConvOvlResult = TryRefInitWithConversionFunction(
	S, Entity, Kind, Initializer, /AllowRValues/ true,
	/IsLValueRef/ isLValueRef, Sequence);
	if (ConvOvlResult)
	Sequence.SetOverloadFailure(
	InitializationSequence::FK_ReferenceInitOverloadFailed,
	ConvOvlResult);

	return;
	}

	if (RefRelationship == Sema::Ref_Compatible &&
	isRValueRef && InitCategory.isLValue()) {
	Sequence.SetFailed(
	InitializationSequence::FK_RValueReferenceBindingToLValue);
	return;
	}

	Sequence.SetFailed(InitializationSequence::FK_ReferenceInitDropsQualifiers);
	return;
	}

	// - Otherwise, a temporary of type "cv1 T1" is created and initialized
	// from the initializer expression using the rules for a non-reference
	// copy-initialization (8.5). The reference is then bound to the
	// temporary. [...]

	// Ignore address space of reference type at this point and perform address
	// space conversion after the reference binding step.
	QualType cv1T1IgnoreAS =
	T1Quals.hasAddressSpace()
	? S.Context.getQualifiedType(T1, T1Quals.withoutAddressSpace())
	: cv1T1;

	InitializedEntity TempEntity =
	InitializedEntity::InitializeTemporary(cv1T1IgnoreAS);

	// FIXME: Why do we use an implicit conversion here rather than trying
	// copy-initialization?
	ImplicitConversionSequence ICS
	= S.TryImplicitConversion(Initializer, TempEntity.getType(),
	/SuppressUserConversions=/false,
	Sema::AllowedExplicit::None,
	/FIXME:InOverloadResolution=/false,
	/CStyle=/Kind.isCStyleOrFunctionalCast(),
	/AllowObjCWritebackConversion=/false);

	if (ICS.isBad()) {
	// FIXME: Use the conversion function set stored in ICS to turn
	// this into an overloading ambiguity diagnostic. However, we need
	// to keep that set as an OverloadCandidateSet rather than as some
	// other kind of set.
	if (ConvOvlResult && !Sequence.getFailedCandidateSet().empty())
	Sequence.SetOverloadFailure(
	InitializationSequence::FK_ReferenceInitOverloadFailed,
	ConvOvlResult);
	else if (S.Context.getCanonicalType(T2) == S.Context.OverloadTy)
	Sequence.SetFailed(InitializationSequence::FK_AddressOfOverloadFailed);
	else
	Sequence.SetFailed(InitializationSequence::FK_ReferenceInitFailed);
	return;
	} else {
	Sequence.AddConversionSequenceStep(ICS, TempEntity.getType());
	}

	// [...] If T1 is reference-related to T2, cv1 must be the
	// same cv-qualification as, or greater cv-qualification
	// than, cv2; otherwise, the program is ill-formed.
	unsigned T1CVRQuals = T1Quals.getCVRQualifiers();
	unsigned T2CVRQuals = T2Quals.getCVRQualifiers();
	if (RefRelationship == Sema::Ref_Related &&
	((T1CVRQuals \| T2CVRQuals) != T1CVRQuals \|\|
	!T1Quals.isAddressSpaceSupersetOf(T2Quals))) {
	Sequence.SetFailed(InitializationSequence::FK_ReferenceInitDropsQualifiers);
	return;
	}

	// [...] If T1 is reference-related to T2 and the reference is an rvalue
	// reference, the initializer expression shall not be an lvalue.
	if (RefRelationship >= Sema::Ref_Related && !isLValueRef &&
	InitCategory.isLValue()) {
	Sequence.SetFailed(
	InitializationSequence::FK_RValueReferenceBindingToLValue);
	return;
	}

	Sequence.AddReferenceBindingStep(cv1T1IgnoreAS, /BindingTemporary=/true);

	if (T1Quals.hasAddressSpace()) {
	if (!Qualifiers::isAddressSpaceSupersetOf(T1Quals.getAddressSpace(),
	LangAS::Default)) {
	Sequence.SetFailed(
	InitializationSequence::FK_ReferenceAddrspaceMismatchTemporary);
	return;
	}
	Sequence.AddQualificationConversionStep(cv1T1, isLValueRef ? VK_LValue
	: VK_XValue);
	}
	}

	/// Attempt character array initialization from a string literal
	/// (C++ [dcl.init.string], C99 6.7.8).
	static void TryStringLiteralInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	Expr *Initializer,
	InitializationSequence &Sequence) {
	Sequence.AddStringInitStep(Entity.getType());
	}

	/// Attempt value initialization (C++ [dcl.init]p7).
	static void TryValueInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	InitializationSequence &Sequence,
	InitListExpr *InitList) {
	assert((!InitList \|\| InitList->getNumInits() == 0) &&
	"Shouldn't use value-init for non-empty init lists");

	// C++98 [dcl.init]p5, C++11 [dcl.init]p7:
	//
	// To value-initialize an object of type T means:
	QualType T = Entity.getType();

	// -- if T is an array type, then each element is value-initialized;
	T = S.Context.getBaseElementType(T);

	if (const RecordType *RT = T->getAs<RecordType>()) {
	if (CXXRecordDecl *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
	bool NeedZeroInitialization = true;
	// C++98:
	// -- if T is a class type (clause 9) with a user-declared constructor
	// (12.1), then the default constructor for T is called (and the
	// initialization is ill-formed if T has no accessible default
	// constructor);
	// C++11:
	// -- if T is a class type (clause 9) with either no default constructor
	// (12.1 [class.ctor]) or a default constructor that is user-provided
	// or deleted, then the object is default-initialized;
	//
	// Note that the C++11 rule is the same as the C++98 rule if there are no
	// defaulted or deleted constructors, so we just use it unconditionally.
	CXXConstructorDecl *CD = S.LookupDefaultConstructor(ClassDecl);
	if (!CD \|\| !CD->getCanonicalDecl()->isDefaulted() \|\| CD->isDeleted())
	NeedZeroInitialization = false;

	// -- if T is a (possibly cv-qualified) non-union class type without a
	// user-provided or deleted default constructor, then the object is
	// zero-initialized and, if T has a non-trivial default constructor,
	// default-initialized;
	// The 'non-union' here was removed by DR1502. The 'non-trivial default
	// constructor' part was removed by DR1507.
	if (NeedZeroInitialization)
	Sequence.AddZeroInitializationStep(Entity.getType());

	// C++03:
	// -- if T is a non-union class type without a user-declared constructor,
	// then every non-static data member and base class component of T is
	// value-initialized;
	// [...] A program that calls for [...] value-initialization of an
	// entity of reference type is ill-formed.
	//
	// C++11 doesn't need this handling, because value-initialization does not
	// occur recursively there, and the implicit default constructor is
	// defined as deleted in the problematic cases.
	if (!S.getLangOpts().CPlusPlus11 &&
	ClassDecl->hasUninitializedReferenceMember()) {
	Sequence.SetFailed(InitializationSequence::FK_TooManyInitsForReference);
	return;
	}

	// If this is list-value-initialization, pass the empty init list on when
	// building the constructor call. This affects the semantics of a few
	// things (such as whether an explicit default constructor can be called).
	Expr *InitListAsExpr = InitList;
	MultiExprArg Args(&InitListAsExpr, InitList ? 1 : 0);
	bool InitListSyntax = InitList;

	// FIXME: Instead of creating a CXXConstructExpr of array type here,
	// wrap a class-typed CXXConstructExpr in an ArrayInitLoopExpr.
	return TryConstructorInitialization(
	S, Entity, Kind, Args, T, Entity.getType(), Sequence, InitListSyntax);
	}
	}

	Sequence.AddZeroInitializationStep(Entity.getType());
	}

	/// Attempt default initialization (C++ [dcl.init]p6).
	static void TryDefaultInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	InitializationSequence &Sequence) {
	assert(Kind.getKind() == InitializationKind::IK_Default);

	// C++ [dcl.init]p6:
	// To default-initialize an object of type T means:
	// - if T is an array type, each element is default-initialized;
	QualType DestType = S.Context.getBaseElementType(Entity.getType());

	// - if T is a (possibly cv-qualified) class type (Clause 9), the default
	// constructor for T is called (and the initialization is ill-formed if
	// T has no accessible default constructor);
	if (DestType->isRecordType() && S.getLangOpts().CPlusPlus) {
	TryConstructorInitialization(S, Entity, Kind, std::nullopt, DestType,
	Entity.getType(), Sequence);
	return;
	}

	// - otherwise, no initialization is performed.

	// If a program calls for the default initialization of an object of
	// a const-qualified type T, T shall be a class type with a user-provided
	// default constructor.
	if (DestType.isConstQualified() && S.getLangOpts().CPlusPlus) {
	if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity))
	Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
	return;
	}

	// If the destination type has a lifetime property, zero-initialize it.
	if (DestType.getQualifiers().hasObjCLifetime()) {
	Sequence.AddZeroInitializationStep(Entity.getType());
	return;
	}
	}

	static void TryOrBuildParenListInitialization(
	Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
	ArrayRef<Expr *> Args, InitializationSequence &Sequence, bool VerifyOnly,
	ExprResult *Result = nullptr) {
	- unsigned ArgIndexToProcess = 0;
	+ unsigned EntityIndexToProcess = 0;
	SmallVector<Expr *, 4> InitExprs;
	QualType ResultType;
	Expr *ArrayFiller = nullptr;
	FieldDecl *InitializedFieldInUnion = nullptr;

	- // Process entities (i.e. array members, base classes, or class fields) by
	- // adding an initialization expression to InitExprs for each entity to
	- // initialize.
	- auto ProcessEntities = [&](auto Range) -> bool {
	- bool IsUnionType = Entity.getType()->isUnionType();
	- for (InitializedEntity SubEntity : Range) {
	- // Unions should only have one initializer expression.
	- // If there are more initializers than it will be caught when we check
	- // whether Index equals Args.size().
	- if (ArgIndexToProcess == 1 && IsUnionType)
	- return true;
	-
	- bool IsMember = SubEntity.getKind() == InitializedEntity::EK_Member;
	-
	- // Unnamed bitfields should not be initialized at all, either with an arg
	- // or by default.
	- if (IsMember && cast<FieldDecl>(SubEntity.getDecl())->isUnnamedBitfield())
	- continue;
	-
	- if (ArgIndexToProcess < Args.size()) {
	- // There are still expressions in Args that haven't been processed.
	- // Let's match them to the current entity to initialize.
	- Expr *E = Args[ArgIndexToProcess++];
	-
	- // Incomplete array types indicate flexible array members. Do not allow
	- // paren list initializations of structs with these members, as GCC
	- // doesn't either.
	- if (IsMember) {
	- auto *FD = cast<FieldDecl>(SubEntity.getDecl());
	- if (FD->getType()->isIncompleteArrayType()) {
	- if (!VerifyOnly) {
	- S.Diag(E->getBeginLoc(), diag::err_flexible_array_init)
	- << SourceRange(E->getBeginLoc(), E->getEndLoc());
	- S.Diag(FD->getLocation(), diag::note_flexible_array_member) << FD;
	- }
	- Sequence.SetFailed(
	- InitializationSequence::FK_ParenthesizedListInitFailed);
	- return false;
	- }
	- }
	-
	- InitializationKind SubKind = InitializationKind::CreateForInit(
	- E->getExprLoc(), /isDirectInit=/false, E);
	- InitializationSequence SubSeq(S, SubEntity, SubKind, E);
	-
	- if (SubSeq.Failed()) {
	- if (!VerifyOnly)
	- SubSeq.Diagnose(S, SubEntity, SubKind, E);
	- else
	- Sequence.SetFailed(
	- InitializationSequence::FK_ParenthesizedListInitFailed);
	+ auto HandleInitializedEntity = [&](const InitializedEntity &SubEntity,
	+ const InitializationKind &SubKind,
	+ Expr Arg, Expr *InitExpr = nullptr) {
	+ InitializationSequence IS = [&]() {
	+ if (Arg)
	+ return InitializationSequence(S, SubEntity, SubKind, Arg);
	+ return InitializationSequence(S, SubEntity, SubKind, std::nullopt);
	+ }();

	- return false;
	- }
	- if (!VerifyOnly) {
	- ExprResult ER = SubSeq.Perform(S, SubEntity, SubKind, E);
	- InitExprs.push_back(ER.get());
	- if (IsMember && IsUnionType)
	- InitializedFieldInUnion = cast<FieldDecl>(SubEntity.getDecl());
	- }
	+ if (IS.Failed()) {
	+ if (!VerifyOnly) {
	+ if (Arg)
	+ IS.Diagnose(S, SubEntity, SubKind, Arg);
	+ else
	+ IS.Diagnose(S, SubEntity, SubKind, std::nullopt);
	} else {
	- // We've processed all of the args, but there are still entities that
	- // have to be initialized.
	- if (IsMember) {
	- // C++ [dcl.init]p17.6.2.2
	- // The remaining elements are initialized with their default member
	- // initializers, if any
	- auto *FD = cast<FieldDecl>(SubEntity.getDecl());
	- if (FD->hasInClassInitializer()) {
	- if (!VerifyOnly) {
	- ExprResult DIE = S.BuildCXXDefaultInitExpr(FD->getLocation(), FD);
	- if (DIE.isInvalid())
	- return false;
	- S.checkInitializerLifetime(SubEntity, DIE.get());
	- InitExprs.push_back(DIE.get());
	- }
	- continue;
	- }
	- }
	- // Remaining class elements without default member initializers and
	- // array elements are value initialized:
	- //
	- // C++ [dcl.init]p17.6.2.2
	- // The remaining elements...otherwise are value initialzed
	- //
	- // C++ [dcl.init]p17.5
	- // if the destination type is an array, the object is initialized as
	- // . follows. Let x1, . . . , xk be the elements of the expression-list
	- // ...Let n denote the array size...the ith array element is...value-
	- // initialized for each k < i <= n.
	- InitializationKind SubKind = InitializationKind::CreateValue(
	- Kind.getLocation(), Kind.getLocation(), Kind.getLocation(), true);
	- InitializationSequence SubSeq(S, SubEntity, SubKind, std::nullopt);
	- if (SubSeq.Failed()) {
	- if (!VerifyOnly)
	- SubSeq.Diagnose(S, SubEntity, SubKind, std::nullopt);
	- return false;
	- }
	- if (!VerifyOnly) {
	- ExprResult ER = SubSeq.Perform(S, SubEntity, SubKind, std::nullopt);
	- if (SubEntity.getKind() == InitializedEntity::EK_ArrayElement) {
	- ArrayFiller = ER.get();
	- return true;
	- }
	- InitExprs.push_back(ER.get());
	- }
	+ Sequence.SetFailed(
	+ InitializationSequence::FK_ParenthesizedListInitFailed);
	}
	+
	+ return false;
	+ }
	+ if (!VerifyOnly) {
	+ ExprResult ER;
	+ if (Arg)
	+ ER = IS.Perform(S, SubEntity, SubKind, Arg);
	+ else
	+ ER = IS.Perform(S, SubEntity, SubKind, std::nullopt);
	+ if (InitExpr)
	+ *InitExpr = ER.get();
	+ else
	+ InitExprs.push_back(ER.get());
	}
	return true;
	};

	if (const ArrayType *AT =
	S.getASTContext().getAsArrayType(Entity.getType())) {
	-
	SmallVector<InitializedEntity, 4> ElementEntities;
	uint64_t ArrayLength;
	- // C++ [dcl.init]p17.5
	+ // C++ [dcl.init]p16.5
	// if the destination type is an array, the object is initialized as
	// follows. Let x1, . . . , xk be the elements of the expression-list. If
	- // the destination type is an array of unknown bound, it is define as
	+ // the destination type is an array of unknown bound, it is defined as
	// having k elements.
	if (const ConstantArrayType *CAT =
	- S.getASTContext().getAsConstantArrayType(Entity.getType()))
	+ S.getASTContext().getAsConstantArrayType(Entity.getType())) {
	ArrayLength = CAT->getSize().getZExtValue();
	- else
	+ ResultType = Entity.getType();
	+ } else if (const VariableArrayType *VAT =
	+ S.getASTContext().getAsVariableArrayType(Entity.getType())) {
	+ // Braced-initialization of variable array types is not allowed, even if
	+ // the size is greater than or equal to the number of args, so we don't
	+ // allow them to be initialized via parenthesized aggregate initialization
	+ // either.
	+ const Expr *SE = VAT->getSizeExpr();
	+ S.Diag(SE->getBeginLoc(), diag::err_variable_object_no_init)
	+ << SE->getSourceRange();
	+ return;
	+ } else {
	+ assert(isa<IncompleteArrayType>(Entity.getType()));
	ArrayLength = Args.size();
	+ }
	+ EntityIndexToProcess = ArrayLength;

	- if (ArrayLength >= Args.size()) {
	- for (uint64_t I = 0; I < ArrayLength; ++I)
	- ElementEntities.push_back(
	- InitializedEntity::InitializeElement(S.getASTContext(), I, Entity));
	-
	- if (!ProcessEntities(ElementEntities))
	+ // ...the ith array element is copy-initialized with xi for each
	+ // 1 <= i <= k
	+ for (Expr *E : Args) {
	+ InitializedEntity SubEntity = InitializedEntity::InitializeElement(
	+ S.getASTContext(), EntityIndexToProcess, Entity);
	+ InitializationKind SubKind = InitializationKind::CreateForInit(
	+ E->getExprLoc(), /isDirectInit=/false, E);
	+ if (!HandleInitializedEntity(SubEntity, SubKind, E))
	+ return;
	+ }
	+ // ...and value-initialized for each k < i <= n;
	+ if (ArrayLength > Args.size()) {
	+ InitializedEntity SubEntity = InitializedEntity::InitializeElement(
	+ S.getASTContext(), Args.size(), Entity);
	+ InitializationKind SubKind = InitializationKind::CreateValue(
	+ Kind.getLocation(), Kind.getLocation(), Kind.getLocation(), true);
	+ if (!HandleInitializedEntity(SubEntity, SubKind, nullptr, &ArrayFiller))
	return;
	+ }

	+ if (ResultType.isNull()) {
	ResultType = S.Context.getConstantArrayType(
	AT->getElementType(), llvm::APInt(/numBits=/32, ArrayLength),
	- nullptr, ArrayType::Normal, 0);
	+ /SizeExpr=/nullptr, ArrayType::Normal, 0);
	}
	} else if (auto *RT = Entity.getType()->getAs<RecordType>()) {
	+ bool IsUnion = RT->isUnionType();
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());

	- auto BaseRange = map_range(RD->bases(), [&](auto &base) {
	- return InitializedEntity::InitializeBase(S.getASTContext(), &base, false,
	- &Entity);
	- });
	- auto FieldRange = map_range(RD->fields(), [](auto *field) {
	- return InitializedEntity::InitializeMember(field);
	- });
	+ if (!IsUnion) {
	+ for (const CXXBaseSpecifier &Base : RD->bases()) {
	+ InitializedEntity SubEntity = InitializedEntity::InitializeBase(
	+ S.getASTContext(), &Base, false, &Entity);
	+ if (EntityIndexToProcess < Args.size()) {
	+ // C++ [dcl.init]p16.6.2.2.
	+ // ...the object is initialized is follows. Let e1, ..., en be the
	+ // elements of the aggregate([dcl.init.aggr]). Let x1, ..., xk be
	+ // the elements of the expression-list...The element ei is
	+ // copy-initialized with xi for 1 <= i <= k.
	+ Expr *E = Args[EntityIndexToProcess];
	+ InitializationKind SubKind = InitializationKind::CreateForInit(
	+ E->getExprLoc(), /isDirectInit=/false, E);
	+ if (!HandleInitializedEntity(SubEntity, SubKind, E))
	+ return;
	+ } else {
	+ // We've processed all of the args, but there are still base classes
	+ // that have to be initialized.
	+ // C++ [dcl.init]p17.6.2.2
	+ // The remaining elements...otherwise are value initialzed
	+ InitializationKind SubKind = InitializationKind::CreateValue(
	+ Kind.getLocation(), Kind.getLocation(), Kind.getLocation(),
	+ /IsImplicit=/true);
	+ if (!HandleInitializedEntity(SubEntity, SubKind, nullptr))
	+ return;
	+ }
	+ EntityIndexToProcess++;
	+ }
	+ }

	- if (!ProcessEntities(BaseRange))
	- return;
	+ for (FieldDecl *FD : RD->fields()) {
	+ // Unnamed bitfields should not be initialized at all, either with an arg
	+ // or by default.
	+ if (FD->isUnnamedBitfield())
	+ continue;

	- if (!ProcessEntities(FieldRange))
	- return;
	+ InitializedEntity SubEntity =
	+ InitializedEntity::InitializeMemberFromParenAggInit(FD);

	+ if (EntityIndexToProcess < Args.size()) {
	+ // ...The element ei is copy-initialized with xi for 1 <= i <= k.
	+ Expr *E = Args[EntityIndexToProcess];
	+
	+ // Incomplete array types indicate flexible array members. Do not allow
	+ // paren list initializations of structs with these members, as GCC
	+ // doesn't either.
	+ if (FD->getType()->isIncompleteArrayType()) {
	+ if (!VerifyOnly) {
	+ S.Diag(E->getBeginLoc(), diag::err_flexible_array_init)
	+ << SourceRange(E->getBeginLoc(), E->getEndLoc());
	+ S.Diag(FD->getLocation(), diag::note_flexible_array_member) << FD;
	+ }
	+ Sequence.SetFailed(
	+ InitializationSequence::FK_ParenthesizedListInitFailed);
	+ return;
	+ }
	+
	+ InitializationKind SubKind = InitializationKind::CreateForInit(
	+ E->getExprLoc(), /isDirectInit=/false, E);
	+ if (!HandleInitializedEntity(SubEntity, SubKind, E))
	+ return;
	+
	+ // Unions should have only one initializer expression, so we bail out
	+ // after processing the first field. If there are more initializers then
	+ // it will be caught when we later check whether EntityIndexToProcess is
	+ // less than Args.size();
	+ if (IsUnion) {
	+ InitializedFieldInUnion = FD;
	+ EntityIndexToProcess = 1;
	+ break;
	+ }
	+ } else {
	+ // We've processed all of the args, but there are still members that
	+ // have to be initialized.
	+ if (FD->hasInClassInitializer()) {
	+ if (!VerifyOnly) {
	+ // C++ [dcl.init]p16.6.2.2
	+ // The remaining elements are initialized with their default
	+ // member initializers, if any
	+ ExprResult DIE = S.BuildCXXDefaultInitExpr(FD->getLocation(), FD);
	+ if (DIE.isInvalid())
	+ return;
	+ S.checkInitializerLifetime(SubEntity, DIE.get());
	+ InitExprs.push_back(DIE.get());
	+ }
	+ } else {
	+ // C++ [dcl.init]p17.6.2.2
	+ // The remaining elements...otherwise are value initialzed
	+ if (FD->getType()->isReferenceType()) {
	+ Sequence.SetFailed(
	+ InitializationSequence::FK_ParenthesizedListInitFailed);
	+ if (!VerifyOnly) {
	+ SourceRange SR = Kind.getParenOrBraceRange();
	+ S.Diag(SR.getEnd(), diag::err_init_reference_member_uninitialized)
	+ << FD->getType() << SR;
	+ S.Diag(FD->getLocation(), diag::note_uninit_reference_member);
	+ }
	+ return;
	+ }
	+ InitializationKind SubKind = InitializationKind::CreateValue(
	+ Kind.getLocation(), Kind.getLocation(), Kind.getLocation(), true);
	+ if (!HandleInitializedEntity(SubEntity, SubKind, nullptr))
	+ return;
	+ }
	+ }
	+ EntityIndexToProcess++;
	+ }
	ResultType = Entity.getType();
	}

	// Not all of the args have been processed, so there must've been more args
	- // then were required to initialize the element.
	- if (ArgIndexToProcess < Args.size()) {
	+ // than were required to initialize the element.
	+ if (EntityIndexToProcess < Args.size()) {
	Sequence.SetFailed(InitializationSequence::FK_ParenthesizedListInitFailed);
	if (!VerifyOnly) {
	QualType T = Entity.getType();
	int InitKind = T->isArrayType() ? 0 : T->isUnionType() ? 3 : 4;
	- SourceRange ExcessInitSR(Args[ArgIndexToProcess]->getBeginLoc(),
	+ SourceRange ExcessInitSR(Args[EntityIndexToProcess]->getBeginLoc(),
	Args.back()->getEndLoc());
	S.Diag(Kind.getLocation(), diag::err_excess_initializers)
	<< InitKind << ExcessInitSR;
	}
	return;
	}

	if (VerifyOnly) {
	Sequence.setSequenceKind(InitializationSequence::NormalSequence);
	Sequence.AddParenthesizedListInitStep(Entity.getType());
	} else if (Result) {
	SourceRange SR = Kind.getParenOrBraceRange();
	auto *CPLIE = CXXParenListInitExpr::Create(
	S.getASTContext(), InitExprs, ResultType, Args.size(),
	Kind.getLocation(), SR.getBegin(), SR.getEnd());
	if (ArrayFiller)
	CPLIE->setArrayFiller(ArrayFiller);
	if (InitializedFieldInUnion)
	CPLIE->setInitializedFieldInUnion(InitializedFieldInUnion);
	*Result = CPLIE;
	S.Diag(Kind.getLocation(),
	diag::warn_cxx17_compat_aggregate_init_paren_list)
	<< Kind.getLocation() << SR << ResultType;
	}

	return;
	}

	/// Attempt a user-defined conversion between two types (C++ [dcl.init]),
	/// which enumerates all conversion functions and performs overload resolution
	/// to select the best.
	static void TryUserDefinedConversion(Sema &S,
	QualType DestType,
	const InitializationKind &Kind,
	Expr *Initializer,
	InitializationSequence &Sequence,
	bool TopLevelOfInitList) {
	assert(!DestType->isReferenceType() && "References are handled elsewhere");
	QualType SourceType = Initializer->getType();
	assert((DestType->isRecordType() \|\| SourceType->isRecordType()) &&
	"Must have a class type to perform a user-defined conversion");

	// Build the candidate set directly in the initialization sequence
	// structure, so that it will persist if we fail.
	OverloadCandidateSet &CandidateSet = Sequence.getFailedCandidateSet();
	CandidateSet.clear(OverloadCandidateSet::CSK_InitByUserDefinedConversion);
	CandidateSet.setDestAS(DestType.getQualifiers().getAddressSpace());

	// Determine whether we are allowed to call explicit constructors or
	// explicit conversion operators.
	bool AllowExplicit = Kind.AllowExplicit();

	if (const RecordType *DestRecordType = DestType->getAs<RecordType>()) {
	// The type we're converting to is a class type. Enumerate its constructors
	// to see if there is a suitable conversion.
	CXXRecordDecl *DestRecordDecl
	= cast<CXXRecordDecl>(DestRecordType->getDecl());

	// Try to complete the type we're converting to.
	if (S.isCompleteType(Kind.getLocation(), DestType)) {
	for (NamedDecl *D : S.LookupConstructors(DestRecordDecl)) {
	auto Info = getConstructorInfo(D);
	if (!Info.Constructor)
	continue;

	if (!Info.Constructor->isInvalidDecl() &&
	Info.Constructor->isConvertingConstructor(/AllowExplicit/true)) {
	if (Info.ConstructorTmpl)
	S.AddTemplateOverloadCandidate(
	Info.ConstructorTmpl, Info.FoundDecl,
	/ExplicitArgs/ nullptr, Initializer, CandidateSet,
	/SuppressUserConversions=/true,
	/PartialOverloading/ false, AllowExplicit);
	else
	S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl,
	Initializer, CandidateSet,
	/SuppressUserConversions=/true,
	/PartialOverloading/ false, AllowExplicit);
	}
	}
	}
	}

	SourceLocation DeclLoc = Initializer->getBeginLoc();

	if (const RecordType *SourceRecordType = SourceType->getAs<RecordType>()) {
	// The type we're converting from is a class type, enumerate its conversion
	// functions.

	// We can only enumerate the conversion functions for a complete type; if
	// the type isn't complete, simply skip this step.
	if (S.isCompleteType(DeclLoc, SourceType)) {
	CXXRecordDecl *SourceRecordDecl
	= cast<CXXRecordDecl>(SourceRecordType->getDecl());

	const auto &Conversions =
	SourceRecordDecl->getVisibleConversionFunctions();
	for (auto I = Conversions.begin(), E = Conversions.end(); I != E; ++I) {
	NamedDecl D = I;
	CXXRecordDecl *ActingDC = cast<CXXRecordDecl>(D->getDeclContext());
	if (isa<UsingShadowDecl>(D))
	D = cast<UsingShadowDecl>(D)->getTargetDecl();

	FunctionTemplateDecl *ConvTemplate = dyn_cast<FunctionTemplateDecl>(D);
	CXXConversionDecl *Conv;
	if (ConvTemplate)
	Conv = cast<CXXConversionDecl>(ConvTemplate->getTemplatedDecl());
	else
	Conv = cast<CXXConversionDecl>(D);

	if (ConvTemplate)
	S.AddTemplateConversionCandidate(
	ConvTemplate, I.getPair(), ActingDC, Initializer, DestType,
	CandidateSet, AllowExplicit, AllowExplicit);
	else
	S.AddConversionCandidate(Conv, I.getPair(), ActingDC, Initializer,
	DestType, CandidateSet, AllowExplicit,
	AllowExplicit);
	}
	}
	}

	// Perform overload resolution. If it fails, return the failed result.
	OverloadCandidateSet::iterator Best;
	if (OverloadingResult Result
	= CandidateSet.BestViableFunction(S, DeclLoc, Best)) {
	Sequence.SetOverloadFailure(
	InitializationSequence::FK_UserConversionOverloadFailed, Result);

	// [class.copy.elision]p3:
	// In some copy-initialization contexts, a two-stage overload resolution
	// is performed.
	// If the first overload resolution selects a deleted function, we also
	// need the initialization sequence to decide whether to perform the second
	// overload resolution.
	if (!(Result == OR_Deleted &&
	Kind.getKind() == InitializationKind::IK_Copy))
	return;
	}

	FunctionDecl *Function = Best->Function;
	Function->setReferenced();
	bool HadMultipleCandidates = (CandidateSet.size() > 1);

	if (isa<CXXConstructorDecl>(Function)) {
	// Add the user-defined conversion step. Any cv-qualification conversion is
	// subsumed by the initialization. Per DR5, the created temporary is of the
	// cv-unqualified type of the destination.
	Sequence.AddUserConversionStep(Function, Best->FoundDecl,
	DestType.getUnqualifiedType(),
	HadMultipleCandidates);

	// C++14 and before:
	// - if the function is a constructor, the call initializes a temporary
	// of the cv-unqualified version of the destination type. The [...]
	// temporary [...] is then used to direct-initialize, according to the
	// rules above, the object that is the destination of the
	// copy-initialization.
	// Note that this just performs a simple object copy from the temporary.
	//
	// C++17:
	// - if the function is a constructor, the call is a prvalue of the
	// cv-unqualified version of the destination type whose return object
	// is initialized by the constructor. The call is used to
	// direct-initialize, according to the rules above, the object that
	// is the destination of the copy-initialization.
	// Therefore we need to do nothing further.
	//
	// FIXME: Mark this copy as extraneous.
	if (!S.getLangOpts().CPlusPlus17)
	Sequence.AddFinalCopy(DestType);
	else if (DestType.hasQualifiers())
	Sequence.AddQualificationConversionStep(DestType, VK_PRValue);
	return;
	}

	// Add the user-defined conversion step that calls the conversion function.
	QualType ConvType = Function->getCallResultType();
	Sequence.AddUserConversionStep(Function, Best->FoundDecl, ConvType,
	HadMultipleCandidates);

	if (ConvType->getAs<RecordType>()) {
	// The call is used to direct-initialize [...] the object that is the
	// destination of the copy-initialization.
	//
	// In C++17, this does not call a constructor if we enter /17.6.1:
	// - If the initializer expression is a prvalue and the cv-unqualified
	// version of the source type is the same as the class of the
	// destination [... do not make an extra copy]
	//
	// FIXME: Mark this copy as extraneous.
	if (!S.getLangOpts().CPlusPlus17 \|\|
	Function->getReturnType()->isReferenceType() \|\|
	!S.Context.hasSameUnqualifiedType(ConvType, DestType))
	Sequence.AddFinalCopy(DestType);
	else if (!S.Context.hasSameType(ConvType, DestType))
	Sequence.AddQualificationConversionStep(DestType, VK_PRValue);
	return;
	}

	// If the conversion following the call to the conversion function
	// is interesting, add it as a separate step.
	if (Best->FinalConversion.First \|\| Best->FinalConversion.Second \|\|
	Best->FinalConversion.Third) {
	ImplicitConversionSequence ICS;
	ICS.setStandard();
	ICS.Standard = Best->FinalConversion;
	Sequence.AddConversionSequenceStep(ICS, DestType, TopLevelOfInitList);
	}
	}

	/// An egregious hack for compatibility with libstdc++-4.2: in <tr1/hashtable>,
	/// a function with a pointer return type contains a 'return false;' statement.
	/// In C++11, 'false' is not a null pointer, so this breaks the build of any
	/// code using that header.
	///
	/// Work around this by treating 'return false;' as zero-initializing the result
	/// if it's used in a pointer-returning function in a system header.
	static bool isLibstdcxxPointerReturnFalseHack(Sema &S,
	const InitializedEntity &Entity,
	const Expr *Init) {
	return S.getLangOpts().CPlusPlus11 &&
	Entity.getKind() == InitializedEntity::EK_Result &&
	Entity.getType()->isPointerType() &&
	isa<CXXBoolLiteralExpr>(Init) &&
	!cast<CXXBoolLiteralExpr>(Init)->getValue() &&
	S.getSourceManager().isInSystemHeader(Init->getExprLoc());
	}

	/// The non-zero enum values here are indexes into diagnostic alternatives.
	enum InvalidICRKind { IIK_okay, IIK_nonlocal, IIK_nonscalar };

	/// Determines whether this expression is an acceptable ICR source.
	static InvalidICRKind isInvalidICRSource(ASTContext &C, Expr *e,
	bool isAddressOf, bool &isWeakAccess) {
	// Skip parens.
	e = e->IgnoreParens();

	// Skip address-of nodes.
	if (UnaryOperator *op = dyn_cast<UnaryOperator>(e)) {
	if (op->getOpcode() == UO_AddrOf)
	return isInvalidICRSource(C, op->getSubExpr(), /addressof/ true,
	isWeakAccess);

	// Skip certain casts.
	} else if (CastExpr *ce = dyn_cast<CastExpr>(e)) {
	switch (ce->getCastKind()) {
	case CK_Dependent:
	case CK_BitCast:
	case CK_LValueBitCast:
	case CK_NoOp:
	return isInvalidICRSource(C, ce->getSubExpr(), isAddressOf, isWeakAccess);

	case CK_ArrayToPointerDecay:
	return IIK_nonscalar;

	case CK_NullToPointer:
	return IIK_okay;

	default:
	break;
	}

	// If we have a declaration reference, it had better be a local variable.
	} else if (isa<DeclRefExpr>(e)) {
	// set isWeakAccess to true, to mean that there will be an implicit
	// load which requires a cleanup.
	if (e->getType().getObjCLifetime() == Qualifiers::OCL_Weak)
	isWeakAccess = true;

	if (!isAddressOf) return IIK_nonlocal;

	VarDecl *var = dyn_cast<VarDecl>(cast<DeclRefExpr>(e)->getDecl());
	if (!var) return IIK_nonlocal;

	return (var->hasLocalStorage() ? IIK_okay : IIK_nonlocal);

	// If we have a conditional operator, check both sides.
	} else if (ConditionalOperator *cond = dyn_cast<ConditionalOperator>(e)) {
	if (InvalidICRKind iik = isInvalidICRSource(C, cond->getLHS(), isAddressOf,
	isWeakAccess))
	return iik;

	return isInvalidICRSource(C, cond->getRHS(), isAddressOf, isWeakAccess);

	// These are never scalar.
	} else if (isa<ArraySubscriptExpr>(e)) {
	return IIK_nonscalar;

	// Otherwise, it needs to be a null pointer constant.
	} else {
	return (e->isNullPointerConstant(C, Expr::NPC_ValueDependentIsNull)
	? IIK_okay : IIK_nonlocal);
	}

	return IIK_nonlocal;
	}

	/// Check whether the given expression is a valid operand for an
	/// indirect copy/restore.
	static void checkIndirectCopyRestoreSource(Sema &S, Expr *src) {
	assert(src->isPRValue());
	bool isWeakAccess = false;
	InvalidICRKind iik = isInvalidICRSource(S.Context, src, false, isWeakAccess);
	// If isWeakAccess to true, there will be an implicit
	// load which requires a cleanup.
	if (S.getLangOpts().ObjCAutoRefCount && isWeakAccess)
	S.Cleanup.setExprNeedsCleanups(true);

	if (iik == IIK_okay) return;

	S.Diag(src->getExprLoc(), diag::err_arc_nonlocal_writeback)
	<< ((unsigned) iik - 1) // shift index into diagnostic explanations
	<< src->getSourceRange();
	}

	/// Determine whether we have compatible array types for the
	/// purposes of GNU by-copy array initialization.
	static bool hasCompatibleArrayTypes(ASTContext &Context, const ArrayType *Dest,
	const ArrayType *Source) {
	// If the source and destination array types are equivalent, we're
	// done.
	if (Context.hasSameType(QualType(Dest, 0), QualType(Source, 0)))
	return true;

	// Make sure that the element types are the same.
	if (!Context.hasSameType(Dest->getElementType(), Source->getElementType()))
	return false;

	// The only mismatch we allow is when the destination is an
	// incomplete array type and the source is a constant array type.
	return Source->isConstantArrayType() && Dest->isIncompleteArrayType();
	}

	static bool tryObjCWritebackConversion(Sema &S,
	InitializationSequence &Sequence,
	const InitializedEntity &Entity,
	Expr *Initializer) {
	bool ArrayDecay = false;
	QualType ArgType = Initializer->getType();
	QualType ArgPointee;
	if (const ArrayType *ArgArrayType = S.Context.getAsArrayType(ArgType)) {
	ArrayDecay = true;
	ArgPointee = ArgArrayType->getElementType();
	ArgType = S.Context.getPointerType(ArgPointee);
	}

	// Handle write-back conversion.
	QualType ConvertedArgType;
	if (!S.isObjCWritebackConversion(ArgType, Entity.getType(),
	ConvertedArgType))
	return false;

	// We should copy unless we're passing to an argument explicitly
	// marked 'out'.
	bool ShouldCopy = true;
	if (ParmVarDecl *param = cast_or_null<ParmVarDecl>(Entity.getDecl()))
	ShouldCopy = (param->getObjCDeclQualifier() != ParmVarDecl::OBJC_TQ_Out);

	// Do we need an lvalue conversion?
	if (ArrayDecay \|\| Initializer->isGLValue()) {
	ImplicitConversionSequence ICS;
	ICS.setStandard();
	ICS.Standard.setAsIdentityConversion();

	QualType ResultType;
	if (ArrayDecay) {
	ICS.Standard.First = ICK_Array_To_Pointer;
	ResultType = S.Context.getPointerType(ArgPointee);
	} else {
	ICS.Standard.First = ICK_Lvalue_To_Rvalue;
	ResultType = Initializer->getType().getNonLValueExprType(S.Context);
	}

	Sequence.AddConversionSequenceStep(ICS, ResultType);
	}

	Sequence.AddPassByIndirectCopyRestoreStep(Entity.getType(), ShouldCopy);
	return true;
	}

	static bool TryOCLSamplerInitialization(Sema &S,
	InitializationSequence &Sequence,
	QualType DestType,
	Expr *Initializer) {
	if (!S.getLangOpts().OpenCL \|\| !DestType->isSamplerT() \|\|
	(!Initializer->isIntegerConstantExpr(S.Context) &&
	!Initializer->getType()->isSamplerT()))
	return false;

	Sequence.AddOCLSamplerInitStep(DestType);
	return true;
	}

	static bool IsZeroInitializer(Expr *Initializer, Sema &S) {
	return Initializer->isIntegerConstantExpr(S.getASTContext()) &&
	(Initializer->EvaluateKnownConstInt(S.getASTContext()) == 0);
	}

	static bool TryOCLZeroOpaqueTypeInitialization(Sema &S,
	InitializationSequence &Sequence,
	QualType DestType,
	Expr *Initializer) {
	if (!S.getLangOpts().OpenCL)
	return false;

	//
	// OpenCL 1.2 spec, s6.12.10
	//
	// The event argument can also be used to associate the
	// async_work_group_copy with a previous async copy allowing
	// an event to be shared by multiple async copies; otherwise
	// event should be zero.
	//
	if (DestType->isEventT() \|\| DestType->isQueueT()) {
	if (!IsZeroInitializer(Initializer, S))
	return false;

	Sequence.AddOCLZeroOpaqueTypeStep(DestType);
	return true;
	}

	// We should allow zero initialization for all types defined in the
	// cl_intel_device_side_avc_motion_estimation extension, except
	// intel_sub_group_avc_mce_payload_t and intel_sub_group_avc_mce_result_t.
	if (S.getOpenCLOptions().isAvailableOption(
	"cl_intel_device_side_avc_motion_estimation", S.getLangOpts()) &&
	DestType->isOCLIntelSubgroupAVCType()) {
	if (DestType->isOCLIntelSubgroupAVCMcePayloadType() \|\|
	DestType->isOCLIntelSubgroupAVCMceResultType())
	return false;
	if (!IsZeroInitializer(Initializer, S))
	return false;

	Sequence.AddOCLZeroOpaqueTypeStep(DestType);
	return true;
	}

	return false;
	}

	InitializationSequence::InitializationSequence(
	Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
	MultiExprArg Args, bool TopLevelOfInitList, bool TreatUnavailableAsInvalid)
	: FailedOverloadResult(OR_Success),
	FailedCandidateSet(Kind.getLocation(), OverloadCandidateSet::CSK_Normal) {
	InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList,
	TreatUnavailableAsInvalid);
	}

	/// Tries to get a FunctionDecl out of `E`. If it succeeds and we can take the
	/// address of that function, this returns true. Otherwise, it returns false.
	static bool isExprAnUnaddressableFunction(Sema &S, const Expr *E) {
	auto *DRE = dyn_cast<DeclRefExpr>(E);
	if (!DRE \|\| !isa<FunctionDecl>(DRE->getDecl()))
	return false;

	return !S.checkAddressOfFunctionIsAvailable(
	cast<FunctionDecl>(DRE->getDecl()));
	}

	/// Determine whether we can perform an elementwise array copy for this kind
	/// of entity.
	static bool canPerformArrayCopy(const InitializedEntity &Entity) {
	switch (Entity.getKind()) {
	case InitializedEntity::EK_LambdaCapture:
	// C++ [expr.prim.lambda]p24:
	// For array members, the array elements are direct-initialized in
	// increasing subscript order.
	return true;

	case InitializedEntity::EK_Variable:
	// C++ [dcl.decomp]p1:
	// [...] each element is copy-initialized or direct-initialized from the
	// corresponding element of the assignment-expression [...]
	return isa<DecompositionDecl>(Entity.getDecl());

	case InitializedEntity::EK_Member:
	// C++ [class.copy.ctor]p14:
	// - if the member is an array, each element is direct-initialized with
	// the corresponding subobject of x
	return Entity.isImplicitMemberInitializer();

	case InitializedEntity::EK_ArrayElement:
	// All the above cases are intended to apply recursively, even though none
	// of them actually say that.
	if (auto *E = Entity.getParent())
	return canPerformArrayCopy(*E);
	break;

	default:
	break;
	}

	return false;
	}

	void InitializationSequence::InitializeFrom(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	MultiExprArg Args,
	bool TopLevelOfInitList,
	bool TreatUnavailableAsInvalid) {
	ASTContext &Context = S.Context;

	// Eliminate non-overload placeholder types in the arguments. We
	// need to do this before checking whether types are dependent
	// because lowering a pseudo-object expression might well give us
	// something of dependent type.
	for (unsigned I = 0, E = Args.size(); I != E; ++I)
	if (Args[I]->getType()->isNonOverloadPlaceholderType()) {
	// FIXME: should we be doing this here?
	ExprResult result = S.CheckPlaceholderExpr(Args[I]);
	if (result.isInvalid()) {
	SetFailed(FK_PlaceholderType);
	return;
	}
	Args[I] = result.get();
	}

	// C++0x [dcl.init]p16:
	// The semantics of initializers are as follows. The destination type is
	// the type of the object or reference being initialized and the source
	// type is the type of the initializer expression. The source type is not
	// defined when the initializer is a braced-init-list or when it is a
	// parenthesized list of expressions.
	QualType DestType = Entity.getType();

	if (DestType->isDependentType() \|\|
	Expr::hasAnyTypeDependentArguments(Args)) {
	SequenceKind = DependentSequence;
	return;
	}

	// Almost everything is a normal sequence.
	setSequenceKind(NormalSequence);

	QualType SourceType;
	Expr *Initializer = nullptr;
	if (Args.size() == 1) {
	Initializer = Args[0];
	if (S.getLangOpts().ObjC) {
	if (S.CheckObjCBridgeRelatedConversions(Initializer->getBeginLoc(),
	DestType, Initializer->getType(),
	Initializer) \|\|
	S.CheckConversionToObjCLiteral(DestType, Initializer))
	Args[0] = Initializer;
	}
	if (!isa<InitListExpr>(Initializer))
	SourceType = Initializer->getType();
	}

	// - If the initializer is a (non-parenthesized) braced-init-list, the
	// object is list-initialized (8.5.4).
	if (Kind.getKind() != InitializationKind::IK_Direct) {
	if (InitListExpr *InitList = dyn_cast_or_null<InitListExpr>(Initializer)) {
	TryListInitialization(S, Entity, Kind, InitList, *this,
	TreatUnavailableAsInvalid);
	return;
	}
	}

	// - If the destination type is a reference type, see 8.5.3.
	if (DestType->isReferenceType()) {
	// C++0x [dcl.init.ref]p1:
	// A variable declared to be a T& or T&&, that is, "reference to type T"
	// (8.3.2), shall be initialized by an object, or function, of type T or
	// by an object that can be converted into a T.
	// (Therefore, multiple arguments are not permitted.)
	if (Args.size() != 1)
	SetFailed(FK_TooManyInitsForReference);
	// C++17 [dcl.init.ref]p5:
	// A reference [...] is initialized by an expression [...] as follows:
	// If the initializer is not an expression, presumably we should reject,
	// but the standard fails to actually say so.
	else if (isa<InitListExpr>(Args[0]))
	SetFailed(FK_ParenthesizedListInitForReference);
	else
	TryReferenceInitialization(S, Entity, Kind, Args[0], *this);
	return;
	}

	// - If the initializer is (), the object is value-initialized.
	if (Kind.getKind() == InitializationKind::IK_Value \|\|
	(Kind.getKind() == InitializationKind::IK_Direct && Args.empty())) {
	TryValueInitialization(S, Entity, Kind, *this);
	return;
	}

	// Handle default initialization.
	if (Kind.getKind() == InitializationKind::IK_Default) {
	TryDefaultInitialization(S, Entity, Kind, *this);
	return;
	}

	// - If the destination type is an array of characters, an array of
	// char16_t, an array of char32_t, or an array of wchar_t, and the
	// initializer is a string literal, see 8.5.2.
	// - Otherwise, if the destination type is an array, the program is
	// ill-formed.
	if (const ArrayType *DestAT = Context.getAsArrayType(DestType)) {
	if (Initializer && isa<VariableArrayType>(DestAT)) {
	SetFailed(FK_VariableLengthArrayHasInitializer);
	return;
	}

	if (Initializer) {
	switch (IsStringInit(Initializer, DestAT, Context)) {
	case SIF_None:
	TryStringLiteralInitialization(S, Entity, Kind, Initializer, *this);
	return;
	case SIF_NarrowStringIntoWideChar:
	SetFailed(FK_NarrowStringIntoWideCharArray);
	return;
	case SIF_WideStringIntoChar:
	SetFailed(FK_WideStringIntoCharArray);
	return;
	case SIF_IncompatWideStringIntoWideChar:
	SetFailed(FK_IncompatWideStringIntoWideChar);
	return;
	case SIF_PlainStringIntoUTF8Char:
	SetFailed(FK_PlainStringIntoUTF8Char);
	return;
	case SIF_UTF8StringIntoPlainChar:
	SetFailed(FK_UTF8StringIntoPlainChar);
	return;
	case SIF_Other:
	break;
	}
	}

	// Some kinds of initialization permit an array to be initialized from
	// another array of the same type, and perform elementwise initialization.
	if (Initializer && isa<ConstantArrayType>(DestAT) &&
	S.Context.hasSameUnqualifiedType(Initializer->getType(),
	Entity.getType()) &&
	canPerformArrayCopy(Entity)) {
	// If source is a prvalue, use it directly.
	if (Initializer->isPRValue()) {
	AddArrayInitStep(DestType, /IsGNUExtension/false);
	return;
	}

	// Emit element-at-a-time copy loop.
	InitializedEntity Element =
	InitializedEntity::InitializeElement(S.Context, 0, Entity);
	QualType InitEltT =
	Context.getAsArrayType(Initializer->getType())->getElementType();
	OpaqueValueExpr OVE(Initializer->getExprLoc(), InitEltT,
	Initializer->getValueKind(),
	Initializer->getObjectKind());
	Expr *OVEAsExpr = &OVE;
	InitializeFrom(S, Element, Kind, OVEAsExpr, TopLevelOfInitList,
	TreatUnavailableAsInvalid);
	if (!Failed())
	AddArrayInitLoopStep(Entity.getType(), InitEltT);
	return;
	}

	// Note: as an GNU C extension, we allow initialization of an
	// array from a compound literal that creates an array of the same
	// type, so long as the initializer has no side effects.
	if (!S.getLangOpts().CPlusPlus && Initializer &&
	isa<CompoundLiteralExpr>(Initializer->IgnoreParens()) &&
	Initializer->getType()->isArrayType()) {
	const ArrayType *SourceAT
	= Context.getAsArrayType(Initializer->getType());
	if (!hasCompatibleArrayTypes(S.Context, DestAT, SourceAT))
	SetFailed(FK_ArrayTypeMismatch);
	else if (Initializer->HasSideEffects(S.Context))
	SetFailed(FK_NonConstantArrayInit);
	else {
	AddArrayInitStep(DestType, /IsGNUExtension/true);
	}
	}
	// Note: as a GNU C++ extension, we allow list-initialization of a
	// class member of array type from a parenthesized initializer list.
	else if (S.getLangOpts().CPlusPlus &&
	Entity.getKind() == InitializedEntity::EK_Member &&
	Initializer && isa<InitListExpr>(Initializer)) {
	TryListInitialization(S, Entity, Kind, cast<InitListExpr>(Initializer),
	*this, TreatUnavailableAsInvalid);
	AddParenthesizedArrayInitStep(DestType);
	} else if (S.getLangOpts().CPlusPlus20 && !TopLevelOfInitList &&
	Kind.getKind() == InitializationKind::IK_Direct)
	TryOrBuildParenListInitialization(S, Entity, Kind, Args, *this,
	/VerifyOnly=/true);
	else if (DestAT->getElementType()->isCharType())
	SetFailed(FK_ArrayNeedsInitListOrStringLiteral);
	else if (IsWideCharCompatible(DestAT->getElementType(), Context))
	SetFailed(FK_ArrayNeedsInitListOrWideStringLiteral);
	else
	SetFailed(FK_ArrayNeedsInitList);

	return;
	}

	// Determine whether we should consider writeback conversions for
	// Objective-C ARC.
	bool allowObjCWritebackConversion = S.getLangOpts().ObjCAutoRefCount &&
	Entity.isParameterKind();

	if (TryOCLSamplerInitialization(S, *this, DestType, Initializer))
	return;

	// We're at the end of the line for C: it's either a write-back conversion
	// or it's a C assignment. There's no need to check anything else.
	if (!S.getLangOpts().CPlusPlus) {
	// If allowed, check whether this is an Objective-C writeback conversion.
	if (allowObjCWritebackConversion &&
	tryObjCWritebackConversion(S, *this, Entity, Initializer)) {
	return;
	}

	if (TryOCLZeroOpaqueTypeInitialization(S, *this, DestType, Initializer))
	return;

	// Handle initialization in C
	AddCAssignmentStep(DestType);
	MaybeProduceObjCObject(S, *this, Entity);
	return;
	}

	assert(S.getLangOpts().CPlusPlus);

	// - If the destination type is a (possibly cv-qualified) class type:
	if (DestType->isRecordType()) {
	// - If the initialization is direct-initialization, or if it is
	// copy-initialization where the cv-unqualified version of the
	// source type is the same class as, or a derived class of, the
	// class of the destination, constructors are considered. [...]
	if (Kind.getKind() == InitializationKind::IK_Direct \|\|
	(Kind.getKind() == InitializationKind::IK_Copy &&
	(Context.hasSameUnqualifiedType(SourceType, DestType) \|\|
	S.IsDerivedFrom(Initializer->getBeginLoc(), SourceType, DestType)))) {
	TryConstructorInitialization(S, Entity, Kind, Args, DestType, DestType,
	*this);

	// We fall back to the "no matching constructor" path if the
	// failed candidate set has functions other than the three default
	// constructors. For example, conversion function.
	if (const auto *RD =
	dyn_cast<CXXRecordDecl>(DestType->getAs<RecordType>()->getDecl());
	// In general, we should call isCompleteType for RD to check its
	// completeness, we don't call it here as it was already called in the
	// above TryConstructorInitialization.
	S.getLangOpts().CPlusPlus20 && RD && RD->hasDefinition() &&
	RD->isAggregate() && Failed() &&
	getFailureKind() == FK_ConstructorOverloadFailed) {
	// Do not attempt paren list initialization if overload resolution
	// resolves to a deleted function .
	//
	// We may reach this condition if we have a union wrapping a class with
	// a non-trivial copy or move constructor and we call one of those two
	// constructors. The union is an aggregate, but the matched constructor
	// is implicitly deleted, so we need to prevent aggregate initialization
	// (otherwise, it'll attempt aggregate initialization by initializing
	// the first element with a reference to the union).
	OverloadCandidateSet::iterator Best;
	OverloadingResult OR = getFailedCandidateSet().BestViableFunction(
	S, Kind.getLocation(), Best);
	if (OR != OverloadingResult::OR_Deleted) {
	// C++20 [dcl.init] 17.6.2.2:
	// - Otherwise, if no constructor is viable, the destination type is
	// an
	// aggregate class, and the initializer is a parenthesized
	// expression-list.
	TryOrBuildParenListInitialization(S, Entity, Kind, Args, *this,
	/VerifyOnly=/true);
	}
	}
	} else {
	// - Otherwise (i.e., for the remaining copy-initialization cases),
	// user-defined conversion sequences that can convert from the
	// source type to the destination type or (when a conversion
	// function is used) to a derived class thereof are enumerated as
	// described in 13.3.1.4, and the best one is chosen through
	// overload resolution (13.3).
	TryUserDefinedConversion(S, DestType, Kind, Initializer, *this,
	TopLevelOfInitList);
	}
	return;
	}

	assert(Args.size() >= 1 && "Zero-argument case handled above");

	// For HLSL ext vector types we allow list initialization behavior for C++
	// constructor syntax. This is accomplished by converting initialization
	// arguments an InitListExpr late.
	if (S.getLangOpts().HLSL && DestType->isExtVectorType() &&
	(SourceType.isNull() \|\|
	!Context.hasSameUnqualifiedType(SourceType, DestType))) {

	llvm::SmallVector<Expr *> InitArgs;
	for (auto *Arg : Args) {
	if (Arg->getType()->isExtVectorType()) {
	const auto *VTy = Arg->getType()->castAs<ExtVectorType>();
	unsigned Elm = VTy->getNumElements();
	for (unsigned Idx = 0; Idx < Elm; ++Idx) {
	InitArgs.emplace_back(new (Context) ArraySubscriptExpr(
	Arg,
	IntegerLiteral::Create(
	Context, llvm::APInt(Context.getIntWidth(Context.IntTy), Idx),
	Context.IntTy, SourceLocation()),
	VTy->getElementType(), Arg->getValueKind(), Arg->getObjectKind(),
	SourceLocation()));
	}
	} else
	InitArgs.emplace_back(Arg);
	}
	InitListExpr *ILE = new (Context) InitListExpr(
	S.getASTContext(), SourceLocation(), InitArgs, SourceLocation());
	Args[0] = ILE;
	AddListInitializationStep(DestType);
	return;
	}

	// The remaining cases all need a source type.
	if (Args.size() > 1) {
	SetFailed(FK_TooManyInitsForScalar);
	return;
	} else if (isa<InitListExpr>(Args[0])) {
	SetFailed(FK_ParenthesizedListInitForScalar);
	return;
	}

	// - Otherwise, if the source type is a (possibly cv-qualified) class
	// type, conversion functions are considered.
	if (!SourceType.isNull() && SourceType->isRecordType()) {
	// For a conversion to _Atomic(T) from either T or a class type derived
	// from T, initialize the T object then convert to _Atomic type.
	bool NeedAtomicConversion = false;
	if (const AtomicType *Atomic = DestType->getAs<AtomicType>()) {
	if (Context.hasSameUnqualifiedType(SourceType, Atomic->getValueType()) \|\|
	S.IsDerivedFrom(Initializer->getBeginLoc(), SourceType,
	Atomic->getValueType())) {
	DestType = Atomic->getValueType();
	NeedAtomicConversion = true;
	}
	}

	TryUserDefinedConversion(S, DestType, Kind, Initializer, *this,
	TopLevelOfInitList);
	MaybeProduceObjCObject(S, *this, Entity);
	if (!Failed() && NeedAtomicConversion)
	AddAtomicConversionStep(Entity.getType());
	return;
	}

	// - Otherwise, if the initialization is direct-initialization, the source
	// type is std::nullptr_t, and the destination type is bool, the initial
	// value of the object being initialized is false.
	if (!SourceType.isNull() && SourceType->isNullPtrType() &&
	DestType->isBooleanType() &&
	Kind.getKind() == InitializationKind::IK_Direct) {
	AddConversionSequenceStep(
	ImplicitConversionSequence::getNullptrToBool(SourceType, DestType,
	Initializer->isGLValue()),
	DestType);
	return;
	}

	// - Otherwise, the initial value of the object being initialized is the
	// (possibly converted) value of the initializer expression. Standard
	// conversions (Clause 4) will be used, if necessary, to convert the
	// initializer expression to the cv-unqualified version of the
	// destination type; no user-defined conversions are considered.

	ImplicitConversionSequence ICS
	= S.TryImplicitConversion(Initializer, DestType,
	/SuppressUserConversions/true,
	Sema::AllowedExplicit::None,
	/InOverloadResolution/ false,
	/CStyle=/Kind.isCStyleOrFunctionalCast(),
	allowObjCWritebackConversion);

	if (ICS.isStandard() &&
	ICS.Standard.Second == ICK_Writeback_Conversion) {
	// Objective-C ARC writeback conversion.

	// We should copy unless we're passing to an argument explicitly
	// marked 'out'.
	bool ShouldCopy = true;
	if (ParmVarDecl *Param = cast_or_null<ParmVarDecl>(Entity.getDecl()))
	ShouldCopy = (Param->getObjCDeclQualifier() != ParmVarDecl::OBJC_TQ_Out);

	// If there was an lvalue adjustment, add it as a separate conversion.
	if (ICS.Standard.First == ICK_Array_To_Pointer \|\|
	ICS.Standard.First == ICK_Lvalue_To_Rvalue) {
	ImplicitConversionSequence LvalueICS;
	LvalueICS.setStandard();
	LvalueICS.Standard.setAsIdentityConversion();
	LvalueICS.Standard.setAllToTypes(ICS.Standard.getToType(0));
	LvalueICS.Standard.First = ICS.Standard.First;
	AddConversionSequenceStep(LvalueICS, ICS.Standard.getToType(0));
	}

	AddPassByIndirectCopyRestoreStep(DestType, ShouldCopy);
	} else if (ICS.isBad()) {
	DeclAccessPair dap;
	if (isLibstdcxxPointerReturnFalseHack(S, Entity, Initializer)) {
	AddZeroInitializationStep(Entity.getType());
	} else if (Initializer->getType() == Context.OverloadTy &&
	!S.ResolveAddressOfOverloadedFunction(Initializer, DestType,
	false, dap))
	SetFailed(InitializationSequence::FK_AddressOfOverloadFailed);
	else if (Initializer->getType()->isFunctionType() &&
	isExprAnUnaddressableFunction(S, Initializer))
	SetFailed(InitializationSequence::FK_AddressOfUnaddressableFunction);
	else
	SetFailed(InitializationSequence::FK_ConversionFailed);
	} else {
	AddConversionSequenceStep(ICS, DestType, TopLevelOfInitList);

	MaybeProduceObjCObject(S, *this, Entity);
	}
	}

	InitializationSequence::~InitializationSequence() {
	for (auto &S : Steps)
	S.Destroy();
	}

	//===----------------------------------------------------------------------===//
	// Perform initialization
	//===----------------------------------------------------------------------===//
	static Sema::AssignmentAction
	getAssignmentAction(const InitializedEntity &Entity, bool Diagnose = false) {
	switch(Entity.getKind()) {
	case InitializedEntity::EK_Variable:
	case InitializedEntity::EK_New:
	case InitializedEntity::EK_Exception:
	case InitializedEntity::EK_Base:
	case InitializedEntity::EK_Delegating:
	return Sema::AA_Initializing;

	case InitializedEntity::EK_Parameter:
	if (Entity.getDecl() &&
	isa<ObjCMethodDecl>(Entity.getDecl()->getDeclContext()))
	return Sema::AA_Sending;

	return Sema::AA_Passing;

	case InitializedEntity::EK_Parameter_CF_Audited:
	if (Entity.getDecl() &&
	isa<ObjCMethodDecl>(Entity.getDecl()->getDeclContext()))
	return Sema::AA_Sending;

	return !Diagnose ? Sema::AA_Passing : Sema::AA_Passing_CFAudited;

	case InitializedEntity::EK_Result:
	case InitializedEntity::EK_StmtExprResult: // FIXME: Not quite right.
	return Sema::AA_Returning;

	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_RelatedResult:
	// FIXME: Can we tell apart casting vs. converting?
	return Sema::AA_Casting;

	case InitializedEntity::EK_TemplateParameter:
	// This is really initialization, but refer to it as conversion for
	// consistency with CheckConvertedConstantExpression.
	return Sema::AA_Converting;

	case InitializedEntity::EK_Member:
	+ case InitializedEntity::EK_ParenAggInitMember:
	case InitializedEntity::EK_Binding:
	case InitializedEntity::EK_ArrayElement:
	case InitializedEntity::EK_VectorElement:
	case InitializedEntity::EK_ComplexElement:
	case InitializedEntity::EK_BlockElement:
	case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
	case InitializedEntity::EK_LambdaCapture:
	case InitializedEntity::EK_CompoundLiteralInit:
	return Sema::AA_Initializing;
	}

	llvm_unreachable("Invalid EntityKind!");
	}

	/// Whether we should bind a created object as a temporary when
	/// initializing the given entity.
	static bool shouldBindAsTemporary(const InitializedEntity &Entity) {
	switch (Entity.getKind()) {
	case InitializedEntity::EK_ArrayElement:
	case InitializedEntity::EK_Member:
	+ case InitializedEntity::EK_ParenAggInitMember:
	case InitializedEntity::EK_Result:
	case InitializedEntity::EK_StmtExprResult:
	case InitializedEntity::EK_New:
	case InitializedEntity::EK_Variable:
	case InitializedEntity::EK_Base:
	case InitializedEntity::EK_Delegating:
	case InitializedEntity::EK_VectorElement:
	case InitializedEntity::EK_ComplexElement:
	case InitializedEntity::EK_Exception:
	case InitializedEntity::EK_BlockElement:
	case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
	case InitializedEntity::EK_LambdaCapture:
	case InitializedEntity::EK_CompoundLiteralInit:
	case InitializedEntity::EK_TemplateParameter:
	return false;

	case InitializedEntity::EK_Parameter:
	case InitializedEntity::EK_Parameter_CF_Audited:
	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_RelatedResult:
	case InitializedEntity::EK_Binding:
	return true;
	}

	llvm_unreachable("missed an InitializedEntity kind?");
	}

	/// Whether the given entity, when initialized with an object
	/// created for that initialization, requires destruction.
	static bool shouldDestroyEntity(const InitializedEntity &Entity) {
	switch (Entity.getKind()) {
	case InitializedEntity::EK_Result:
	case InitializedEntity::EK_StmtExprResult:
	case InitializedEntity::EK_New:
	case InitializedEntity::EK_Base:
	case InitializedEntity::EK_Delegating:
	case InitializedEntity::EK_VectorElement:
	case InitializedEntity::EK_ComplexElement:
	case InitializedEntity::EK_BlockElement:
	case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
	case InitializedEntity::EK_LambdaCapture:
	return false;

	case InitializedEntity::EK_Member:
	+ case InitializedEntity::EK_ParenAggInitMember:
	case InitializedEntity::EK_Binding:
	case InitializedEntity::EK_Variable:
	case InitializedEntity::EK_Parameter:
	case InitializedEntity::EK_Parameter_CF_Audited:
	case InitializedEntity::EK_TemplateParameter:
	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_ArrayElement:
	case InitializedEntity::EK_Exception:
	case InitializedEntity::EK_CompoundLiteralInit:
	case InitializedEntity::EK_RelatedResult:
	return true;
	}

	llvm_unreachable("missed an InitializedEntity kind?");
	}

	/// Get the location at which initialization diagnostics should appear.
	static SourceLocation getInitializationLoc(const InitializedEntity &Entity,
	Expr *Initializer) {
	switch (Entity.getKind()) {
	case InitializedEntity::EK_Result:
	case InitializedEntity::EK_StmtExprResult:
	return Entity.getReturnLoc();

	case InitializedEntity::EK_Exception:
	return Entity.getThrowLoc();

	case InitializedEntity::EK_Variable:
	case InitializedEntity::EK_Binding:
	return Entity.getDecl()->getLocation();

	case InitializedEntity::EK_LambdaCapture:
	return Entity.getCaptureLoc();

	case InitializedEntity::EK_ArrayElement:
	case InitializedEntity::EK_Member:
	+ case InitializedEntity::EK_ParenAggInitMember:
	case InitializedEntity::EK_Parameter:
	case InitializedEntity::EK_Parameter_CF_Audited:
	case InitializedEntity::EK_TemplateParameter:
	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_New:
	case InitializedEntity::EK_Base:
	case InitializedEntity::EK_Delegating:
	case InitializedEntity::EK_VectorElement:
	case InitializedEntity::EK_ComplexElement:
	case InitializedEntity::EK_BlockElement:
	case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
	case InitializedEntity::EK_CompoundLiteralInit:
	case InitializedEntity::EK_RelatedResult:
	return Initializer->getBeginLoc();
	}
	llvm_unreachable("missed an InitializedEntity kind?");
	}

	/// Make a (potentially elidable) temporary copy of the object
	/// provided by the given initializer by calling the appropriate copy
	/// constructor.
	///
	/// \param S The Sema object used for type-checking.
	///
	/// \param T The type of the temporary object, which must either be
	/// the type of the initializer expression or a superclass thereof.
	///
	/// \param Entity The entity being initialized.
	///
	/// \param CurInit The initializer expression.
	///
	/// \param IsExtraneousCopy Whether this is an "extraneous" copy that
	/// is permitted in C++03 (but not C++0x) when binding a reference to
	/// an rvalue.
	///
	/// \returns An expression that copies the initializer expression into
	/// a temporary object, or an error expression if a copy could not be
	/// created.
	static ExprResult CopyObject(Sema &S,
	QualType T,
	const InitializedEntity &Entity,
	ExprResult CurInit,
	bool IsExtraneousCopy) {
	if (CurInit.isInvalid())
	return CurInit;
	// Determine which class type we're copying to.
	Expr CurInitExpr = (Expr )CurInit.get();
	CXXRecordDecl *Class = nullptr;
	if (const RecordType *Record = T->getAs<RecordType>())
	Class = cast<CXXRecordDecl>(Record->getDecl());
	if (!Class)
	return CurInit;

	SourceLocation Loc = getInitializationLoc(Entity, CurInit.get());

	// Make sure that the type we are copying is complete.
	if (S.RequireCompleteType(Loc, T, diag::err_temp_copy_incomplete))
	return CurInit;

	// Perform overload resolution using the class's constructors. Per
	// C++11 [dcl.init]p16, second bullet for class types, this initialization
	// is direct-initialization.
	OverloadCandidateSet CandidateSet(Loc, OverloadCandidateSet::CSK_Normal);
	DeclContext::lookup_result Ctors = S.LookupConstructors(Class);

	OverloadCandidateSet::iterator Best;
	switch (ResolveConstructorOverload(
	S, Loc, CurInitExpr, CandidateSet, T, Ctors, Best,
	/CopyInitializing=/false, /AllowExplicit=/true,
	/OnlyListConstructors=/false, /IsListInit=/false,
	/SecondStepOfCopyInit=/true)) {
	case OR_Success:
	break;

	case OR_No_Viable_Function:
	CandidateSet.NoteCandidates(
	PartialDiagnosticAt(
	Loc, S.PDiag(IsExtraneousCopy && !S.isSFINAEContext()
	? diag::ext_rvalue_to_reference_temp_copy_no_viable
	: diag::err_temp_copy_no_viable)
	<< (int)Entity.getKind() << CurInitExpr->getType()
	<< CurInitExpr->getSourceRange()),
	S, OCD_AllCandidates, CurInitExpr);
	if (!IsExtraneousCopy \|\| S.isSFINAEContext())
	return ExprError();
	return CurInit;

	case OR_Ambiguous:
	CandidateSet.NoteCandidates(
	PartialDiagnosticAt(Loc, S.PDiag(diag::err_temp_copy_ambiguous)
	<< (int)Entity.getKind()
	<< CurInitExpr->getType()
	<< CurInitExpr->getSourceRange()),
	S, OCD_AmbiguousCandidates, CurInitExpr);
	return ExprError();

	case OR_Deleted:
	S.Diag(Loc, diag::err_temp_copy_deleted)
	<< (int)Entity.getKind() << CurInitExpr->getType()
	<< CurInitExpr->getSourceRange();
	S.NoteDeletedFunction(Best->Function);
	return ExprError();
	}

	bool HadMultipleCandidates = CandidateSet.size() > 1;

	CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(Best->Function);
	SmallVector<Expr*, 8> ConstructorArgs;
	CurInit.get(); // Ownership transferred into MultiExprArg, below.

	S.CheckConstructorAccess(Loc, Constructor, Best->FoundDecl, Entity,
	IsExtraneousCopy);

	if (IsExtraneousCopy) {
	// If this is a totally extraneous copy for C++03 reference
	// binding purposes, just return the original initialization
	// expression. We don't generate an (elided) copy operation here
	// because doing so would require us to pass down a flag to avoid
	// infinite recursion, where each step adds another extraneous,
	// elidable copy.

	// Instantiate the default arguments of any extra parameters in
	// the selected copy constructor, as if we were going to create a
	// proper call to the copy constructor.
	for (unsigned I = 1, N = Constructor->getNumParams(); I != N; ++I) {
	ParmVarDecl *Parm = Constructor->getParamDecl(I);
	if (S.RequireCompleteType(Loc, Parm->getType(),
	diag::err_call_incomplete_argument))
	break;

	// Build the default argument expression; we don't actually care
	// if this succeeds or not, because this routine will complain
	// if there was a problem.
	S.BuildCXXDefaultArgExpr(Loc, Constructor, Parm);
	}

	return CurInitExpr;
	}

	// Determine the arguments required to actually perform the
	// constructor call (we might have derived-to-base conversions, or
	// the copy constructor may have default arguments).
	if (S.CompleteConstructorCall(Constructor, T, CurInitExpr, Loc,
	ConstructorArgs))
	return ExprError();

	// C++0x [class.copy]p32:
	// When certain criteria are met, an implementation is allowed to
	// omit the copy/move construction of a class object, even if the
	// copy/move constructor and/or destructor for the object have
	// side effects. [...]
	// - when a temporary class object that has not been bound to a
	// reference (12.2) would be copied/moved to a class object
	// with the same cv-unqualified type, the copy/move operation
	// can be omitted by constructing the temporary object
	// directly into the target of the omitted copy/move
	//
	// Note that the other three bullets are handled elsewhere. Copy
	// elision for return statements and throw expressions are handled as part
	// of constructor initialization, while copy elision for exception handlers
	// is handled by the run-time.
	//
	// FIXME: If the function parameter is not the same type as the temporary, we
	// should still be able to elide the copy, but we don't have a way to
	// represent in the AST how much should be elided in this case.
	bool Elidable =
	CurInitExpr->isTemporaryObject(S.Context, Class) &&
	S.Context.hasSameUnqualifiedType(
	Best->Function->getParamDecl(0)->getType().getNonReferenceType(),
	CurInitExpr->getType());

	// Actually perform the constructor call.
	CurInit = S.BuildCXXConstructExpr(Loc, T, Best->FoundDecl, Constructor,
	Elidable,
	ConstructorArgs,
	HadMultipleCandidates,
	/ListInit/ false,
	/StdInitListInit/ false,
	/ZeroInit/ false,
	CXXConstructExpr::CK_Complete,
	SourceRange());

	// If we're supposed to bind temporaries, do so.
	if (!CurInit.isInvalid() && shouldBindAsTemporary(Entity))
	CurInit = S.MaybeBindToTemporary(CurInit.getAs<Expr>());
	return CurInit;
	}

	/// Check whether elidable copy construction for binding a reference to
	/// a temporary would have succeeded if we were building in C++98 mode, for
	/// -Wc++98-compat.
	static void CheckCXX98CompatAccessibleCopy(Sema &S,
	const InitializedEntity &Entity,
	Expr *CurInitExpr) {
	assert(S.getLangOpts().CPlusPlus11);

	const RecordType *Record = CurInitExpr->getType()->getAs<RecordType>();
	if (!Record)
	return;

	SourceLocation Loc = getInitializationLoc(Entity, CurInitExpr);
	if (S.Diags.isIgnored(diag::warn_cxx98_compat_temp_copy, Loc))
	return;

	// Find constructors which would have been considered.
	OverloadCandidateSet CandidateSet(Loc, OverloadCandidateSet::CSK_Normal);
	DeclContext::lookup_result Ctors =
	S.LookupConstructors(cast<CXXRecordDecl>(Record->getDecl()));

	// Perform overload resolution.
	OverloadCandidateSet::iterator Best;
	OverloadingResult OR = ResolveConstructorOverload(
	S, Loc, CurInitExpr, CandidateSet, CurInitExpr->getType(), Ctors, Best,
	/CopyInitializing=/false, /AllowExplicit=/true,
	/OnlyListConstructors=/false, /IsListInit=/false,
	/SecondStepOfCopyInit=/true);

	PartialDiagnostic Diag = S.PDiag(diag::warn_cxx98_compat_temp_copy)
	<< OR << (int)Entity.getKind() << CurInitExpr->getType()
	<< CurInitExpr->getSourceRange();

	switch (OR) {
	case OR_Success:
	S.CheckConstructorAccess(Loc, cast<CXXConstructorDecl>(Best->Function),
	Best->FoundDecl, Entity, Diag);
	// FIXME: Check default arguments as far as that's possible.
	break;

	case OR_No_Viable_Function:
	CandidateSet.NoteCandidates(PartialDiagnosticAt(Loc, Diag), S,
	OCD_AllCandidates, CurInitExpr);
	break;

	case OR_Ambiguous:
	CandidateSet.NoteCandidates(PartialDiagnosticAt(Loc, Diag), S,
	OCD_AmbiguousCandidates, CurInitExpr);
	break;

	case OR_Deleted:
	S.Diag(Loc, Diag);
	S.NoteDeletedFunction(Best->Function);
	break;
	}
	}

	void InitializationSequence::PrintInitLocationNote(Sema &S,
	const InitializedEntity &Entity) {
	if (Entity.isParamOrTemplateParamKind() && Entity.getDecl()) {
	if (Entity.getDecl()->getLocation().isInvalid())
	return;

	if (Entity.getDecl()->getDeclName())
	S.Diag(Entity.getDecl()->getLocation(), diag::note_parameter_named_here)
	<< Entity.getDecl()->getDeclName();
	else
	S.Diag(Entity.getDecl()->getLocation(), diag::note_parameter_here);
	}
	else if (Entity.getKind() == InitializedEntity::EK_RelatedResult &&
	Entity.getMethodDecl())
	S.Diag(Entity.getMethodDecl()->getLocation(),
	diag::note_method_return_type_change)
	<< Entity.getMethodDecl()->getDeclName();
	}

	/// Returns true if the parameters describe a constructor initialization of
	/// an explicit temporary object, e.g. "Point(x, y)".
	static bool isExplicitTemporary(const InitializedEntity &Entity,
	const InitializationKind &Kind,
	unsigned NumArgs) {
	switch (Entity.getKind()) {
	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_CompoundLiteralInit:
	case InitializedEntity::EK_RelatedResult:
	break;
	default:
	return false;
	}

	switch (Kind.getKind()) {
	case InitializationKind::IK_DirectList:
	return true;
	// FIXME: Hack to work around cast weirdness.
	case InitializationKind::IK_Direct:
	case InitializationKind::IK_Value:
	return NumArgs != 1;
	default:
	return false;
	}
	}

	static ExprResult
	PerformConstructorInitialization(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	MultiExprArg Args,
	const InitializationSequence::Step& Step,
	bool &ConstructorInitRequiresZeroInit,
	bool IsListInitialization,
	bool IsStdInitListInitialization,
	SourceLocation LBraceLoc,
	SourceLocation RBraceLoc) {
	unsigned NumArgs = Args.size();
	CXXConstructorDecl *Constructor
	= cast<CXXConstructorDecl>(Step.Function.Function);
	bool HadMultipleCandidates = Step.Function.HadMultipleCandidates;

	// Build a call to the selected constructor.
	SmallVector<Expr*, 8> ConstructorArgs;
	SourceLocation Loc = (Kind.isCopyInit() && Kind.getEqualLoc().isValid())
	? Kind.getEqualLoc()
	: Kind.getLocation();

	if (Kind.getKind() == InitializationKind::IK_Default) {
	// Force even a trivial, implicit default constructor to be
	// semantically checked. We do this explicitly because we don't build
	// the definition for completely trivial constructors.
	assert(Constructor->getParent() && "No parent class for constructor.");
	if (Constructor->isDefaulted() && Constructor->isDefaultConstructor() &&
	Constructor->isTrivial() && !Constructor->isUsed(false)) {
	S.runWithSufficientStackSpace(Loc, [&] {
	S.DefineImplicitDefaultConstructor(Loc, Constructor);
	});
	}
	}

	ExprResult CurInit((Expr *)nullptr);

	// C++ [over.match.copy]p1:
	// - When initializing a temporary to be bound to the first parameter
	// of a constructor that takes a reference to possibly cv-qualified
	// T as its first argument, called with a single argument in the
	// context of direct-initialization, explicit conversion functions
	// are also considered.
	bool AllowExplicitConv =
	Kind.AllowExplicit() && !Kind.isCopyInit() && Args.size() == 1 &&
	hasCopyOrMoveCtorParam(S.Context,
	getConstructorInfo(Step.Function.FoundDecl));

	// Determine the arguments required to actually perform the constructor
	// call.
	if (S.CompleteConstructorCall(Constructor, Step.Type, Args, Loc,
	ConstructorArgs, AllowExplicitConv,
	IsListInitialization))
	return ExprError();

	if (isExplicitTemporary(Entity, Kind, NumArgs)) {
	// An explicitly-constructed temporary, e.g., X(1, 2).
	if (S.DiagnoseUseOfDecl(Constructor, Loc))
	return ExprError();

	TypeSourceInfo *TSInfo = Entity.getTypeSourceInfo();
	if (!TSInfo)
	TSInfo = S.Context.getTrivialTypeSourceInfo(Entity.getType(), Loc);
	SourceRange ParenOrBraceRange =
	(Kind.getKind() == InitializationKind::IK_DirectList)
	? SourceRange(LBraceLoc, RBraceLoc)
	: Kind.getParenOrBraceRange();

	CXXConstructorDecl *CalleeDecl = Constructor;
	if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(
	Step.Function.FoundDecl.getDecl())) {
	CalleeDecl = S.findInheritingConstructor(Loc, Constructor, Shadow);
	if (S.DiagnoseUseOfDecl(CalleeDecl, Loc))
	return ExprError();
	}
	S.MarkFunctionReferenced(Loc, CalleeDecl);

	CurInit = S.CheckForImmediateInvocation(
	CXXTemporaryObjectExpr::Create(
	S.Context, CalleeDecl,
	Entity.getType().getNonLValueExprType(S.Context), TSInfo,
	ConstructorArgs, ParenOrBraceRange, HadMultipleCandidates,
	IsListInitialization, IsStdInitListInitialization,
	ConstructorInitRequiresZeroInit),
	CalleeDecl);
	} else {
	CXXConstructExpr::ConstructionKind ConstructKind =
	CXXConstructExpr::CK_Complete;

	if (Entity.getKind() == InitializedEntity::EK_Base) {
	ConstructKind = Entity.getBaseSpecifier()->isVirtual() ?
	CXXConstructExpr::CK_VirtualBase :
	CXXConstructExpr::CK_NonVirtualBase;
	} else if (Entity.getKind() == InitializedEntity::EK_Delegating) {
	ConstructKind = CXXConstructExpr::CK_Delegating;
	}

	// Only get the parenthesis or brace range if it is a list initialization or
	// direct construction.
	SourceRange ParenOrBraceRange;
	if (IsListInitialization)
	ParenOrBraceRange = SourceRange(LBraceLoc, RBraceLoc);
	else if (Kind.getKind() == InitializationKind::IK_Direct)
	ParenOrBraceRange = Kind.getParenOrBraceRange();

	// If the entity allows NRVO, mark the construction as elidable
	// unconditionally.
	if (Entity.allowsNRVO())
	CurInit = S.BuildCXXConstructExpr(Loc, Step.Type,
	Step.Function.FoundDecl,
	Constructor, /Elidable=/true,
	ConstructorArgs,
	HadMultipleCandidates,
	IsListInitialization,
	IsStdInitListInitialization,
	ConstructorInitRequiresZeroInit,
	ConstructKind,
	ParenOrBraceRange);
	else
	CurInit = S.BuildCXXConstructExpr(Loc, Step.Type,
	Step.Function.FoundDecl,
	Constructor,
	ConstructorArgs,
	HadMultipleCandidates,
	IsListInitialization,
	IsStdInitListInitialization,
	ConstructorInitRequiresZeroInit,
	ConstructKind,
	ParenOrBraceRange);
	}
	if (CurInit.isInvalid())
	return ExprError();

	// Only check access if all of that succeeded.
	S.CheckConstructorAccess(Loc, Constructor, Step.Function.FoundDecl, Entity);
	if (S.DiagnoseUseOfDecl(Step.Function.FoundDecl, Loc))
	return ExprError();

	if (const ArrayType *AT = S.Context.getAsArrayType(Entity.getType()))
	if (checkDestructorReference(S.Context.getBaseElementType(AT), Loc, S))
	return ExprError();

	if (shouldBindAsTemporary(Entity))
	CurInit = S.MaybeBindToTemporary(CurInit.get());

	return CurInit;
	}

	namespace {
	enum LifetimeKind {
	/// The lifetime of a temporary bound to this entity ends at the end of the
	/// full-expression, and that's (probably) fine.
	LK_FullExpression,

	/// The lifetime of a temporary bound to this entity is extended to the
	/// lifeitme of the entity itself.
	LK_Extended,

	/// The lifetime of a temporary bound to this entity probably ends too soon,
	/// because the entity is allocated in a new-expression.
	LK_New,

	/// The lifetime of a temporary bound to this entity ends too soon, because
	/// the entity is a return object.
	LK_Return,

	/// The lifetime of a temporary bound to this entity ends too soon, because
	/// the entity is the result of a statement expression.
	LK_StmtExprResult,

	/// This is a mem-initializer: if it would extend a temporary (other than via
	/// a default member initializer), the program is ill-formed.
	LK_MemInitializer,
	};
	using LifetimeResult =
	llvm::PointerIntPair<const InitializedEntity *, 3, LifetimeKind>;
	}

	/// Determine the declaration which an initialized entity ultimately refers to,
	/// for the purpose of lifetime-extending a temporary bound to a reference in
	/// the initialization of \p Entity.
	static LifetimeResult getEntityLifetime(
	const InitializedEntity *Entity,
	const InitializedEntity *InitField = nullptr) {
	// C++11 [class.temporary]p5:
	switch (Entity->getKind()) {
	case InitializedEntity::EK_Variable:
	// The temporary [...] persists for the lifetime of the reference
	return {Entity, LK_Extended};

	case InitializedEntity::EK_Member:
	// For subobjects, we look at the complete object.
	if (Entity->getParent())
	return getEntityLifetime(Entity->getParent(), Entity);

	// except:
	// C++17 [class.base.init]p8:
	// A temporary expression bound to a reference member in a
	// mem-initializer is ill-formed.
	// C++17 [class.base.init]p11:
	// A temporary expression bound to a reference member from a
	// default member initializer is ill-formed.
	//
	// The context of p11 and its example suggest that it's only the use of a
	// default member initializer from a constructor that makes the program
	// ill-formed, not its mere existence, and that it can even be used by
	// aggregate initialization.
	return {Entity, Entity->isDefaultMemberInitializer() ? LK_Extended
	: LK_MemInitializer};

	case InitializedEntity::EK_Binding:
	// Per [dcl.decomp]p3, the binding is treated as a variable of reference
	// type.
	return {Entity, LK_Extended};

	case InitializedEntity::EK_Parameter:
	case InitializedEntity::EK_Parameter_CF_Audited:
	// -- A temporary bound to a reference parameter in a function call
	// persists until the completion of the full-expression containing
	// the call.
	return {nullptr, LK_FullExpression};

	case InitializedEntity::EK_TemplateParameter:
	// FIXME: This will always be ill-formed; should we eagerly diagnose it here?
	return {nullptr, LK_FullExpression};

	case InitializedEntity::EK_Result:
	// -- The lifetime of a temporary bound to the returned value in a
	// function return statement is not extended; the temporary is
	// destroyed at the end of the full-expression in the return statement.
	return {nullptr, LK_Return};

	case InitializedEntity::EK_StmtExprResult:
	// FIXME: Should we lifetime-extend through the result of a statement
	// expression?
	return {nullptr, LK_StmtExprResult};

	case InitializedEntity::EK_New:
	// -- A temporary bound to a reference in a new-initializer persists
	// until the completion of the full-expression containing the
	// new-initializer.
	return {nullptr, LK_New};

	case InitializedEntity::EK_Temporary:
	case InitializedEntity::EK_CompoundLiteralInit:
	case InitializedEntity::EK_RelatedResult:
	// We don't yet know the storage duration of the surrounding temporary.
	// Assume it's got full-expression duration for now, it will patch up our
	// storage duration if that's not correct.
	return {nullptr, LK_FullExpression};

	case InitializedEntity::EK_ArrayElement:
	// For subobjects, we look at the complete object.
	return getEntityLifetime(Entity->getParent(), InitField);

	case InitializedEntity::EK_Base:
	// For subobjects, we look at the complete object.
	if (Entity->getParent())
	return getEntityLifetime(Entity->getParent(), InitField);
	return {InitField, LK_MemInitializer};

	case InitializedEntity::EK_Delegating:
	// We can reach this case for aggregate initialization in a constructor:
	// struct A { int &&r; };
	// struct B : A { B() : A{0} {} };
	// In this case, use the outermost field decl as the context.
	return {InitField, LK_MemInitializer};

	case InitializedEntity::EK_BlockElement:
	case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
	case InitializedEntity::EK_LambdaCapture:
	case InitializedEntity::EK_VectorElement:
	case InitializedEntity::EK_ComplexElement:
	return {nullptr, LK_FullExpression};

	case InitializedEntity::EK_Exception:
	// FIXME: Can we diagnose lifetime problems with exceptions?
	return {nullptr, LK_FullExpression};
	+
	+ case InitializedEntity::EK_ParenAggInitMember:
	+ // -- A temporary object bound to a reference element of an aggregate of
	+ // class type initialized from a parenthesized expression-list
	+ // [dcl.init, 9.3] persists until the completion of the full-expression
	+ // containing the expression-list.
	+ return {nullptr, LK_FullExpression};
	}
	+
	llvm_unreachable("unknown entity kind");
	}

	namespace {
	enum ReferenceKind {
	/// Lifetime would be extended by a reference binding to a temporary.
	RK_ReferenceBinding,
	/// Lifetime would be extended by a std::initializer_list object binding to
	/// its backing array.
	RK_StdInitializerList,
	};

	/// A temporary or local variable. This will be one of:
	/// * A MaterializeTemporaryExpr.
	/// * A DeclRefExpr whose declaration is a local.
	/// * An AddrLabelExpr.
	/// * A BlockExpr for a block with captures.
	using Local = Expr*;

	/// Expressions we stepped over when looking for the local state. Any steps
	/// that would inhibit lifetime extension or take us out of subexpressions of
	/// the initializer are included.
	struct IndirectLocalPathEntry {
	enum EntryKind {
	DefaultInit,
	AddressOf,
	VarInit,
	LValToRVal,
	LifetimeBoundCall,
	TemporaryCopy,
	LambdaCaptureInit,
	GslReferenceInit,
	GslPointerInit
	} Kind;
	Expr *E;
	union {
	const Decl *D = nullptr;
	const LambdaCapture *Capture;
	};
	IndirectLocalPathEntry() {}
	IndirectLocalPathEntry(EntryKind K, Expr *E) : Kind(K), E(E) {}
	IndirectLocalPathEntry(EntryKind K, Expr E, const Decl D)
	: Kind(K), E(E), D(D) {}
	IndirectLocalPathEntry(EntryKind K, Expr E, const LambdaCapture Capture)
	: Kind(K), E(E), Capture(Capture) {}
	};

	using IndirectLocalPath = llvm::SmallVectorImpl<IndirectLocalPathEntry>;

	struct RevertToOldSizeRAII {
	IndirectLocalPath &Path;
	unsigned OldSize = Path.size();
	RevertToOldSizeRAII(IndirectLocalPath &Path) : Path(Path) {}
	~RevertToOldSizeRAII() { Path.resize(OldSize); }
	};

	using LocalVisitor = llvm::function_ref<bool(IndirectLocalPath &Path, Local L,
	ReferenceKind RK)>;
	}

	static bool isVarOnPath(IndirectLocalPath &Path, VarDecl *VD) {
	for (auto E : Path)
	if (E.Kind == IndirectLocalPathEntry::VarInit && E.D == VD)
	return true;
	return false;
	}

	static bool pathContainsInit(IndirectLocalPath &Path) {
	return llvm::any_of(Path, [=](IndirectLocalPathEntry E) {
	return E.Kind == IndirectLocalPathEntry::DefaultInit \|\|
	E.Kind == IndirectLocalPathEntry::VarInit;
	});
	}

	static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
	Expr *Init, LocalVisitor Visit,
	bool RevisitSubinits,
	bool EnableLifetimeWarnings);

	static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
	Expr *Init, ReferenceKind RK,
	LocalVisitor Visit,
	bool EnableLifetimeWarnings);

	template <typename T> static bool isRecordWithAttr(QualType Type) {
	if (auto *RD = Type->getAsCXXRecordDecl())
	return RD->hasAttr<T>();
	return false;
	}

	// Decl::isInStdNamespace will return false for iterators in some STL
	// implementations due to them being defined in a namespace outside of the std
	// namespace.
	static bool isInStlNamespace(const Decl *D) {
	const DeclContext *DC = D->getDeclContext();
	if (!DC)
	return false;
	if (const auto *ND = dyn_cast<NamespaceDecl>(DC))
	if (const IdentifierInfo *II = ND->getIdentifier()) {
	StringRef Name = II->getName();
	if (Name.size() >= 2 && Name.front() == '_' &&
	(Name[1] == '_' \|\| isUppercase(Name[1])))
	return true;
	}

	return DC->isStdNamespace();
	}

	static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
	if (auto *Conv = dyn_cast_or_null<CXXConversionDecl>(Callee))
	if (isRecordWithAttr<PointerAttr>(Conv->getConversionType()))
	return true;
	if (!isInStlNamespace(Callee->getParent()))
	return false;
	if (!isRecordWithAttr<PointerAttr>(Callee->getThisObjectType()) &&
	!isRecordWithAttr<OwnerAttr>(Callee->getThisObjectType()))
	return false;
	if (Callee->getReturnType()->isPointerType() \|\|
	isRecordWithAttr<PointerAttr>(Callee->getReturnType())) {
	if (!Callee->getIdentifier())
	return false;
	return llvm::StringSwitch<bool>(Callee->getName())
	.Cases("begin", "rbegin", "cbegin", "crbegin", true)
	.Cases("end", "rend", "cend", "crend", true)
	.Cases("c_str", "data", "get", true)
	// Map and set types.
	.Cases("find", "equal_range", "lower_bound", "upper_bound", true)
	.Default(false);
	} else if (Callee->getReturnType()->isReferenceType()) {
	if (!Callee->getIdentifier()) {
	auto OO = Callee->getOverloadedOperator();
	return OO == OverloadedOperatorKind::OO_Subscript \|\|
	OO == OverloadedOperatorKind::OO_Star;
	}
	return llvm::StringSwitch<bool>(Callee->getName())
	.Cases("front", "back", "at", "top", "value", true)
	.Default(false);
	}
	return false;
	}

	static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
	if (!FD->getIdentifier() \|\| FD->getNumParams() != 1)
	return false;
	const auto *RD = FD->getParamDecl(0)->getType()->getPointeeCXXRecordDecl();
	if (!FD->isInStdNamespace() \|\| !RD \|\| !RD->isInStdNamespace())
	return false;
	if (!isRecordWithAttr<PointerAttr>(QualType(RD->getTypeForDecl(), 0)) &&
	!isRecordWithAttr<OwnerAttr>(QualType(RD->getTypeForDecl(), 0)))
	return false;
	if (FD->getReturnType()->isPointerType() \|\|
	isRecordWithAttr<PointerAttr>(FD->getReturnType())) {
	return llvm::StringSwitch<bool>(FD->getName())
	.Cases("begin", "rbegin", "cbegin", "crbegin", true)
	.Cases("end", "rend", "cend", "crend", true)
	.Case("data", true)
	.Default(false);
	} else if (FD->getReturnType()->isReferenceType()) {
	return llvm::StringSwitch<bool>(FD->getName())
	.Cases("get", "any_cast", true)
	.Default(false);
	}
	return false;
	}

	static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call,
	LocalVisitor Visit) {
	auto VisitPointerArg = [&](const Decl D, Expr Arg, bool Value) {
	// We are not interested in the temporary base objects of gsl Pointers:
	// Temp().ptr; // Here ptr might not dangle.
	if (isa<MemberExpr>(Arg->IgnoreImpCasts()))
	return;
	// Once we initialized a value with a reference, it can no longer dangle.
	if (!Value) {
	for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) {
	if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit)
	continue;
	if (PE.Kind == IndirectLocalPathEntry::GslPointerInit)
	return;
	break;
	}
	}
	Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit
	: IndirectLocalPathEntry::GslReferenceInit,
	Arg, D});
	if (Arg->isGLValue())
	visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
	Visit,
	/EnableLifetimeWarnings=/true);
	else
	visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
	/EnableLifetimeWarnings=/true);
	Path.pop_back();
	};

	if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) {
	const auto *MD = cast_or_null<CXXMethodDecl>(MCE->getDirectCallee());
	if (MD && shouldTrackImplicitObjectArg(MD))
	VisitPointerArg(MD, MCE->getImplicitObjectArgument(),
	!MD->getReturnType()->isReferenceType());
	return;
	} else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(Call)) {
	FunctionDecl *Callee = OCE->getDirectCallee();
	if (Callee && Callee->isCXXInstanceMember() &&
	shouldTrackImplicitObjectArg(cast<CXXMethodDecl>(Callee)))
	VisitPointerArg(Callee, OCE->getArg(0),
	!Callee->getReturnType()->isReferenceType());
	return;
	} else if (auto *CE = dyn_cast<CallExpr>(Call)) {
	FunctionDecl *Callee = CE->getDirectCallee();
	if (Callee && shouldTrackFirstArgument(Callee))
	VisitPointerArg(Callee, CE->getArg(0),
	!Callee->getReturnType()->isReferenceType());
	return;
	}

	if (auto *CCE = dyn_cast<CXXConstructExpr>(Call)) {
	const auto *Ctor = CCE->getConstructor();
	const CXXRecordDecl *RD = Ctor->getParent();
	if (CCE->getNumArgs() > 0 && RD->hasAttr<PointerAttr>())
	VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0], true);
	}
	}

	static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) {
	const TypeSourceInfo *TSI = FD->getTypeSourceInfo();
	if (!TSI)
	return false;
	// Don't declare this variable in the second operand of the for-statement;
	// GCC miscompiles that by ending its lifetime before evaluating the
	// third operand. See gcc.gnu.org/PR86769.
	AttributedTypeLoc ATL;
	for (TypeLoc TL = TSI->getTypeLoc();
	(ATL = TL.getAsAdjusted<AttributedTypeLoc>());
	TL = ATL.getModifiedLoc()) {
	if (ATL.getAttrAs<LifetimeBoundAttr>())
	return true;
	}

	// Assume that all assignment operators with a "normal" return type return
	// *this, that is, an lvalue reference that is the same type as the implicit
	// object parameter (or the LHS for a non-member operator$=).
	OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator();
	if (OO == OO_Equal \|\| isCompoundAssignmentOperator(OO)) {
	QualType RetT = FD->getReturnType();
	if (RetT->isLValueReferenceType()) {
	ASTContext &Ctx = FD->getASTContext();
	QualType LHST;
	auto *MD = dyn_cast<CXXMethodDecl>(FD);
	if (MD && MD->isCXXInstanceMember())
	LHST = Ctx.getLValueReferenceType(MD->getThisObjectType());
	else
	LHST = MD->getParamDecl(0)->getType();
	if (Ctx.hasSameType(RetT, LHST))
	return true;
	}
	}

	return false;
	}

	static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call,
	LocalVisitor Visit) {
	const FunctionDecl *Callee;
	ArrayRef<Expr*> Args;

	if (auto *CE = dyn_cast<CallExpr>(Call)) {
	Callee = CE->getDirectCallee();
	Args = llvm::ArrayRef(CE->getArgs(), CE->getNumArgs());
	} else {
	auto *CCE = cast<CXXConstructExpr>(Call);
	Callee = CCE->getConstructor();
	Args = llvm::ArrayRef(CCE->getArgs(), CCE->getNumArgs());
	}
	if (!Callee)
	return;

	Expr *ObjectArg = nullptr;
	if (isa<CXXOperatorCallExpr>(Call) && Callee->isCXXInstanceMember()) {
	ObjectArg = Args[0];
	Args = Args.slice(1);
	} else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) {
	ObjectArg = MCE->getImplicitObjectArgument();
	}

	auto VisitLifetimeBoundArg = [&](const Decl D, Expr Arg) {
	Path.push_back({IndirectLocalPathEntry::LifetimeBoundCall, Arg, D});
	if (Arg->isGLValue())
	visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
	Visit,
	/EnableLifetimeWarnings=/false);
	else
	visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
	/EnableLifetimeWarnings=/false);
	Path.pop_back();
	};

	if (ObjectArg && implicitObjectParamIsLifetimeBound(Callee))
	VisitLifetimeBoundArg(Callee, ObjectArg);

	for (unsigned I = 0,
	N = std::min<unsigned>(Callee->getNumParams(), Args.size());
	I != N; ++I) {
	if (Callee->getParamDecl(I)->hasAttr<LifetimeBoundAttr>())
	VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]);
	}
	}

	/// Visit the locals that would be reachable through a reference bound to the
	/// glvalue expression \c Init.
	static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
	Expr *Init, ReferenceKind RK,
	LocalVisitor Visit,
	bool EnableLifetimeWarnings) {
	RevertToOldSizeRAII RAII(Path);

	// Walk past any constructs which we can lifetime-extend across.
	Expr *Old;
	do {
	Old = Init;

	if (auto *FE = dyn_cast<FullExpr>(Init))
	Init = FE->getSubExpr();

	if (InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
	// If this is just redundant braces around an initializer, step over it.
	if (ILE->isTransparent())
	Init = ILE->getInit(0);
	}

	// Step over any subobject adjustments; we may have a materialized
	// temporary inside them.
	Init = const_cast<Expr *>(Init->skipRValueSubobjectAdjustments());

	// Per current approach for DR1376, look through casts to reference type
	// when performing lifetime extension.
	if (CastExpr *CE = dyn_cast<CastExpr>(Init))
	if (CE->getSubExpr()->isGLValue())
	Init = CE->getSubExpr();

	// Per the current approach for DR1299, look through array element access
	// on array glvalues when performing lifetime extension.
	if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Init)) {
	Init = ASE->getBase();
	auto *ICE = dyn_cast<ImplicitCastExpr>(Init);
	if (ICE && ICE->getCastKind() == CK_ArrayToPointerDecay)
	Init = ICE->getSubExpr();
	else
	// We can't lifetime extend through this but we might still find some
	// retained temporaries.
	return visitLocalsRetainedByInitializer(Path, Init, Visit, true,
	EnableLifetimeWarnings);
	}

	// Step into CXXDefaultInitExprs so we can diagnose cases where a
	// constructor inherits one as an implicit mem-initializer.
	if (auto *DIE = dyn_cast<CXXDefaultInitExpr>(Init)) {
	Path.push_back(
	{IndirectLocalPathEntry::DefaultInit, DIE, DIE->getField()});
	Init = DIE->getExpr();
	}
	} while (Init != Old);

	if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(Init)) {
	if (Visit(Path, Local(MTE), RK))
	visitLocalsRetainedByInitializer(Path, MTE->getSubExpr(), Visit, true,
	EnableLifetimeWarnings);
	}

	if (isa<CallExpr>(Init)) {
	if (EnableLifetimeWarnings)
	handleGslAnnotatedTypes(Path, Init, Visit);
	return visitLifetimeBoundArguments(Path, Init, Visit);
	}

	switch (Init->getStmtClass()) {
	case Stmt::DeclRefExprClass: {
	// If we find the name of a local non-reference parameter, we could have a
	// lifetime problem.
	auto *DRE = cast<DeclRefExpr>(Init);
	auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
	if (VD && VD->hasLocalStorage() &&
	!DRE->refersToEnclosingVariableOrCapture()) {
	if (!VD->getType()->isReferenceType()) {
	Visit(Path, Local(DRE), RK);
	} else if (isa<ParmVarDecl>(DRE->getDecl())) {
	// The lifetime of a reference parameter is unknown; assume it's OK
	// for now.
	break;
	} else if (VD->getInit() && !isVarOnPath(Path, VD)) {
	Path.push_back({IndirectLocalPathEntry::VarInit, DRE, VD});
	visitLocalsRetainedByReferenceBinding(Path, VD->getInit(),
	RK_ReferenceBinding, Visit,
	EnableLifetimeWarnings);
	}
	}
	break;
	}

	case Stmt::UnaryOperatorClass: {
	// The only unary operator that make sense to handle here
	// is Deref. All others don't resolve to a "name." This includes
	// handling all sorts of rvalues passed to a unary operator.
	const UnaryOperator *U = cast<UnaryOperator>(Init);
	if (U->getOpcode() == UO_Deref)
	visitLocalsRetainedByInitializer(Path, U->getSubExpr(), Visit, true,
	EnableLifetimeWarnings);
	break;
	}

	case Stmt::OMPArraySectionExprClass: {
	visitLocalsRetainedByInitializer(Path,
	cast<OMPArraySectionExpr>(Init)->getBase(),
	Visit, true, EnableLifetimeWarnings);
	break;
	}

	case Stmt::ConditionalOperatorClass:
	case Stmt::BinaryConditionalOperatorClass: {
	auto *C = cast<AbstractConditionalOperator>(Init);
	if (!C->getTrueExpr()->getType()->isVoidType())
	visitLocalsRetainedByReferenceBinding(Path, C->getTrueExpr(), RK, Visit,
	EnableLifetimeWarnings);
	if (!C->getFalseExpr()->getType()->isVoidType())
	visitLocalsRetainedByReferenceBinding(Path, C->getFalseExpr(), RK, Visit,
	EnableLifetimeWarnings);
	break;
	}

	// FIXME: Visit the left-hand side of an -> or ->*.

	default:
	break;
	}
	}

	/// Visit the locals that would be reachable through an object initialized by
	/// the prvalue expression \c Init.
	static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
	Expr *Init, LocalVisitor Visit,
	bool RevisitSubinits,
	bool EnableLifetimeWarnings) {
	RevertToOldSizeRAII RAII(Path);

	Expr *Old;
	do {
	Old = Init;

	// Step into CXXDefaultInitExprs so we can diagnose cases where a
	// constructor inherits one as an implicit mem-initializer.
	if (auto *DIE = dyn_cast<CXXDefaultInitExpr>(Init)) {
	Path.push_back({IndirectLocalPathEntry::DefaultInit, DIE, DIE->getField()});
	Init = DIE->getExpr();
	}

	if (auto *FE = dyn_cast<FullExpr>(Init))
	Init = FE->getSubExpr();

	// Dig out the expression which constructs the extended temporary.
	Init = const_cast<Expr *>(Init->skipRValueSubobjectAdjustments());

	if (CXXBindTemporaryExpr *BTE = dyn_cast<CXXBindTemporaryExpr>(Init))
	Init = BTE->getSubExpr();

	Init = Init->IgnoreParens();

	// Step over value-preserving rvalue casts.
	if (auto *CE = dyn_cast<CastExpr>(Init)) {
	switch (CE->getCastKind()) {
	case CK_LValueToRValue:
	// If we can match the lvalue to a const object, we can look at its
	// initializer.
	Path.push_back({IndirectLocalPathEntry::LValToRVal, CE});
	return visitLocalsRetainedByReferenceBinding(
	Path, Init, RK_ReferenceBinding,
	[&](IndirectLocalPath &Path, Local L, ReferenceKind RK) -> bool {
	if (auto *DRE = dyn_cast<DeclRefExpr>(L)) {
	auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
	if (VD && VD->getType().isConstQualified() && VD->getInit() &&
	!isVarOnPath(Path, VD)) {
	Path.push_back({IndirectLocalPathEntry::VarInit, DRE, VD});
	visitLocalsRetainedByInitializer(Path, VD->getInit(), Visit, true,
	EnableLifetimeWarnings);
	}
	} else if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(L)) {
	if (MTE->getType().isConstQualified())
	visitLocalsRetainedByInitializer(Path, MTE->getSubExpr(), Visit,
	true, EnableLifetimeWarnings);
	}
	return false;
	}, EnableLifetimeWarnings);

	// We assume that objects can be retained by pointers cast to integers,
	// but not if the integer is cast to floating-point type or to _Complex.
	// We assume that casts to 'bool' do not preserve enough information to
	// retain a local object.
	case CK_NoOp:
	case CK_BitCast:
	case CK_BaseToDerived:
	case CK_DerivedToBase:
	case CK_UncheckedDerivedToBase:
	case CK_Dynamic:
	case CK_ToUnion:
	case CK_UserDefinedConversion:
	case CK_ConstructorConversion:
	case CK_IntegralToPointer:
	case CK_PointerToIntegral:
	case CK_VectorSplat:
	case CK_IntegralCast:
	case CK_CPointerToObjCPointerCast:
	case CK_BlockPointerToObjCPointerCast:
	case CK_AnyPointerToBlockPointerCast:
	case CK_AddressSpaceConversion:
	break;

	case CK_ArrayToPointerDecay:
	// Model array-to-pointer decay as taking the address of the array
	// lvalue.
	Path.push_back({IndirectLocalPathEntry::AddressOf, CE});
	return visitLocalsRetainedByReferenceBinding(Path, CE->getSubExpr(),
	RK_ReferenceBinding, Visit,
	EnableLifetimeWarnings);

	default:
	return;
	}

	Init = CE->getSubExpr();
	}
	} while (Old != Init);

	// C++17 [dcl.init.list]p6:
	// initializing an initializer_list object from the array extends the
	// lifetime of the array exactly like binding a reference to a temporary.
	if (auto *ILE = dyn_cast<CXXStdInitializerListExpr>(Init))
	return visitLocalsRetainedByReferenceBinding(Path, ILE->getSubExpr(),
	RK_StdInitializerList, Visit,
	EnableLifetimeWarnings);

	if (InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
	// We already visited the elements of this initializer list while
	// performing the initialization. Don't visit them again unless we've
	// changed the lifetime of the initialized entity.
	if (!RevisitSubinits)
	return;

	if (ILE->isTransparent())
	return visitLocalsRetainedByInitializer(Path, ILE->getInit(0), Visit,
	RevisitSubinits,
	EnableLifetimeWarnings);

	if (ILE->getType()->isArrayType()) {
	for (unsigned I = 0, N = ILE->getNumInits(); I != N; ++I)
	visitLocalsRetainedByInitializer(Path, ILE->getInit(I), Visit,
	RevisitSubinits,
	EnableLifetimeWarnings);
	return;
	}

	if (CXXRecordDecl *RD = ILE->getType()->getAsCXXRecordDecl()) {
	assert(RD->isAggregate() && "aggregate init on non-aggregate");

	// If we lifetime-extend a braced initializer which is initializing an
	// aggregate, and that aggregate contains reference members which are
	// bound to temporaries, those temporaries are also lifetime-extended.
	if (RD->isUnion() && ILE->getInitializedFieldInUnion() &&
	ILE->getInitializedFieldInUnion()->getType()->isReferenceType())
	visitLocalsRetainedByReferenceBinding(Path, ILE->getInit(0),
	RK_ReferenceBinding, Visit,
	EnableLifetimeWarnings);
	else {
	unsigned Index = 0;
	for (; Index < RD->getNumBases() && Index < ILE->getNumInits(); ++Index)
	visitLocalsRetainedByInitializer(Path, ILE->getInit(Index), Visit,
	RevisitSubinits,
	EnableLifetimeWarnings);
	for (const auto *I : RD->fields()) {
	if (Index >= ILE->getNumInits())
	break;
	if (I->isUnnamedBitfield())
	continue;
	Expr *SubInit = ILE->getInit(Index);
	if (I->getType()->isReferenceType())
	visitLocalsRetainedByReferenceBinding(Path, SubInit,
	RK_ReferenceBinding, Visit,
	EnableLifetimeWarnings);
	else
	// This might be either aggregate-initialization of a member or
	// initialization of a std::initializer_list object. Regardless,
	// we should recursively lifetime-extend that initializer.
	visitLocalsRetainedByInitializer(Path, SubInit, Visit,
	RevisitSubinits,
	EnableLifetimeWarnings);
	++Index;
	}
	}
	}
	return;
	}

	// The lifetime of an init-capture is that of the closure object constructed
	// by a lambda-expression.
	if (auto *LE = dyn_cast<LambdaExpr>(Init)) {
	LambdaExpr::capture_iterator CapI = LE->capture_begin();
	for (Expr *E : LE->capture_inits()) {
	assert(CapI != LE->capture_end());
	const LambdaCapture &Cap = *CapI++;
	if (!E)
	continue;
	if (Cap.capturesVariable())
	Path.push_back({IndirectLocalPathEntry::LambdaCaptureInit, E, &Cap});
	if (E->isGLValue())
	visitLocalsRetainedByReferenceBinding(Path, E, RK_ReferenceBinding,
	Visit, EnableLifetimeWarnings);
	else
	visitLocalsRetainedByInitializer(Path, E, Visit, true,
	EnableLifetimeWarnings);
	if (Cap.capturesVariable())
	Path.pop_back();
	}
	}

	// Assume that a copy or move from a temporary references the same objects
	// that the temporary does.
	if (auto *CCE = dyn_cast<CXXConstructExpr>(Init)) {
	if (CCE->getConstructor()->isCopyOrMoveConstructor()) {
	if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(CCE->getArg(0))) {
	Expr *Arg = MTE->getSubExpr();
	Path.push_back({IndirectLocalPathEntry::TemporaryCopy, Arg,
	CCE->getConstructor()});
	visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
	/EnableLifetimeWarnings/false);
	Path.pop_back();
	}
	}
	}

	if (isa<CallExpr>(Init) \|\| isa<CXXConstructExpr>(Init)) {
	if (EnableLifetimeWarnings)
	handleGslAnnotatedTypes(Path, Init, Visit);
	return visitLifetimeBoundArguments(Path, Init, Visit);
	}

	switch (Init->getStmtClass()) {
	case Stmt::UnaryOperatorClass: {
	auto *UO = cast<UnaryOperator>(Init);
	// If the initializer is the address of a local, we could have a lifetime
	// problem.
	if (UO->getOpcode() == UO_AddrOf) {
	// If this is &rvalue, then it's ill-formed and we have already diagnosed
	// it. Don't produce a redundant warning about the lifetime of the
	// temporary.
	if (isa<MaterializeTemporaryExpr>(UO->getSubExpr()))
	return;

	Path.push_back({IndirectLocalPathEntry::AddressOf, UO});
	visitLocalsRetainedByReferenceBinding(Path, UO->getSubExpr(),
	RK_ReferenceBinding, Visit,
	EnableLifetimeWarnings);
	}
	break;
	}

	case Stmt::BinaryOperatorClass: {
	// Handle pointer arithmetic.
	auto *BO = cast<BinaryOperator>(Init);
	BinaryOperatorKind BOK = BO->getOpcode();
	if (!BO->getType()->isPointerType() \|\| (BOK != BO_Add && BOK != BO_Sub))
	break;

	if (BO->getLHS()->getType()->isPointerType())
	visitLocalsRetainedByInitializer(Path, BO->getLHS(), Visit, true,
	EnableLifetimeWarnings);
	else if (BO->getRHS()->getType()->isPointerType())
	visitLocalsRetainedByInitializer(Path, BO->getRHS(), Visit, true,
	EnableLifetimeWarnings);
	break;
	}

	case Stmt::ConditionalOperatorClass:
	case Stmt::BinaryConditionalOperatorClass: {
	auto *C = cast<AbstractConditionalOperator>(Init);
	// In C++, we can have a throw-expression operand, which has 'void' type
	// and isn't interesting from a lifetime perspective.
	if (!C->getTrueExpr()->getType()->isVoidType())
	visitLocalsRetainedByInitializer(Path, C->getTrueExpr(), Visit, true,
	EnableLifetimeWarnings);
	if (!C->getFalseExpr()->getType()->isVoidType())
	visitLocalsRetainedByInitializer(Path, C->getFalseExpr(), Visit, true,
	EnableLifetimeWarnings);
	break;
	}

	case Stmt::BlockExprClass:
	if (cast<BlockExpr>(Init)->getBlockDecl()->hasCaptures()) {
	// This is a local block, whose lifetime is that of the function.
	Visit(Path, Local(cast<BlockExpr>(Init)), RK_ReferenceBinding);
	}
	break;

	case Stmt::AddrLabelExprClass:
	// We want to warn if the address of a label would escape the function.
	Visit(Path, Local(cast<AddrLabelExpr>(Init)), RK_ReferenceBinding);
	break;

	default:
	break;
	}
	}

	/// Whether a path to an object supports lifetime extension.
	enum PathLifetimeKind {
	/// Lifetime-extend along this path.
	Extend,
	/// We should lifetime-extend, but we don't because (due to technical
	/// limitations) we can't. This happens for default member initializers,
	/// which we don't clone for every use, so we don't have a unique
	/// MaterializeTemporaryExpr to update.
	ShouldExtend,
	/// Do not lifetime extend along this path.
	NoExtend
	};

	/// Determine whether this is an indirect path to a temporary that we are
	/// supposed to lifetime-extend along.
	static PathLifetimeKind
	shouldLifetimeExtendThroughPath(const IndirectLocalPath &Path) {
	PathLifetimeKind Kind = PathLifetimeKind::Extend;
	for (auto Elem : Path) {
	if (Elem.Kind == IndirectLocalPathEntry::DefaultInit)
	Kind = PathLifetimeKind::ShouldExtend;
	else if (Elem.Kind != IndirectLocalPathEntry::LambdaCaptureInit)
	return PathLifetimeKind::NoExtend;
	}
	return Kind;
	}

	/// Find the range for the first interesting entry in the path at or after I.
	static SourceRange nextPathEntryRange(const IndirectLocalPath &Path, unsigned I,
	Expr *E) {
	for (unsigned N = Path.size(); I != N; ++I) {
	switch (Path[I].Kind) {
	case IndirectLocalPathEntry::AddressOf:
	case IndirectLocalPathEntry::LValToRVal:
	case IndirectLocalPathEntry::LifetimeBoundCall:
	case IndirectLocalPathEntry::TemporaryCopy:
	case IndirectLocalPathEntry::GslReferenceInit:
	case IndirectLocalPathEntry::GslPointerInit:
	// These exist primarily to mark the path as not permitting or
	// supporting lifetime extension.
	break;

	case IndirectLocalPathEntry::VarInit:
	if (cast<VarDecl>(Path[I].D)->isImplicit())
	return SourceRange();
	[[fallthrough]];
	case IndirectLocalPathEntry::DefaultInit:
	return Path[I].E->getSourceRange();

	case IndirectLocalPathEntry::LambdaCaptureInit:
	if (!Path[I].Capture->capturesVariable())
	continue;
	return Path[I].E->getSourceRange();
	}
	}
	return E->getSourceRange();
	}

	static bool pathOnlyInitializesGslPointer(IndirectLocalPath &Path) {
	for (const auto &It : llvm::reverse(Path)) {
	if (It.Kind == IndirectLocalPathEntry::VarInit)
	continue;
	if (It.Kind == IndirectLocalPathEntry::AddressOf)
	continue;
	if (It.Kind == IndirectLocalPathEntry::LifetimeBoundCall)
	continue;
	return It.Kind == IndirectLocalPathEntry::GslPointerInit \|\|
	It.Kind == IndirectLocalPathEntry::GslReferenceInit;
	}
	return false;
	}

	void Sema::checkInitializerLifetime(const InitializedEntity &Entity,
	Expr *Init) {
	LifetimeResult LR = getEntityLifetime(&Entity);
	LifetimeKind LK = LR.getInt();
	const InitializedEntity *ExtendingEntity = LR.getPointer();

	// If this entity doesn't have an interesting lifetime, don't bother looking
	// for temporaries within its initializer.
	if (LK == LK_FullExpression)
	return;

	auto TemporaryVisitor = [&](IndirectLocalPath &Path, Local L,
	ReferenceKind RK) -> bool {
	SourceRange DiagRange = nextPathEntryRange(Path, 0, L);
	SourceLocation DiagLoc = DiagRange.getBegin();

	auto *MTE = dyn_cast<MaterializeTemporaryExpr>(L);

	bool IsGslPtrInitWithGslTempOwner = false;
	bool IsLocalGslOwner = false;
	if (pathOnlyInitializesGslPointer(Path)) {
	if (isa<DeclRefExpr>(L)) {
	// We do not want to follow the references when returning a pointer originating
	// from a local owner to avoid the following false positive:
	// int &p = *localUniquePtr;
	// someContainer.add(std::move(localUniquePtr));
	// return p;
	IsLocalGslOwner = isRecordWithAttr<OwnerAttr>(L->getType());
	if (pathContainsInit(Path) \|\| !IsLocalGslOwner)
	return false;
	} else {
	IsGslPtrInitWithGslTempOwner = MTE && !MTE->getExtendingDecl() &&
	isRecordWithAttr<OwnerAttr>(MTE->getType());
	// Skipping a chain of initializing gsl::Pointer annotated objects.
	// We are looking only for the final source to find out if it was
	// a local or temporary owner or the address of a local variable/param.
	if (!IsGslPtrInitWithGslTempOwner)
	return true;
	}
	}

	switch (LK) {
	case LK_FullExpression:
	llvm_unreachable("already handled this");

	case LK_Extended: {
	if (!MTE) {
	// The initialized entity has lifetime beyond the full-expression,
	// and the local entity does too, so don't warn.
	//
	// FIXME: We should consider warning if a static / thread storage
	// duration variable retains an automatic storage duration local.
	return false;
	}

	if (IsGslPtrInitWithGslTempOwner && DiagLoc.isValid()) {
	Diag(DiagLoc, diag::warn_dangling_lifetime_pointer) << DiagRange;
	return false;
	}

	switch (shouldLifetimeExtendThroughPath(Path)) {
	case PathLifetimeKind::Extend:
	// Update the storage duration of the materialized temporary.
	// FIXME: Rebuild the expression instead of mutating it.
	MTE->setExtendingDecl(ExtendingEntity->getDecl(),
	ExtendingEntity->allocateManglingNumber());
	// Also visit the temporaries lifetime-extended by this initializer.
	return true;

	case PathLifetimeKind::ShouldExtend:
	// We're supposed to lifetime-extend the temporary along this path (per
	// the resolution of DR1815), but we don't support that yet.
	//
	// FIXME: Properly handle this situation. Perhaps the easiest approach
	// would be to clone the initializer expression on each use that would
	// lifetime extend its temporaries.
	Diag(DiagLoc, diag::warn_unsupported_lifetime_extension)
	<< RK << DiagRange;
	break;

	case PathLifetimeKind::NoExtend:
	// If the path goes through the initialization of a variable or field,
	// it can't possibly reach a temporary created in this full-expression.
	// We will have already diagnosed any problems with the initializer.
	if (pathContainsInit(Path))
	return false;

	Diag(DiagLoc, diag::warn_dangling_variable)
	<< RK << !Entity.getParent()
	<< ExtendingEntity->getDecl()->isImplicit()
	<< ExtendingEntity->getDecl() << Init->isGLValue() << DiagRange;
	break;
	}
	break;
	}

	case LK_MemInitializer: {
	if (isa<MaterializeTemporaryExpr>(L)) {
	// Under C++ DR1696, if a mem-initializer (or a default member
	// initializer used by the absence of one) would lifetime-extend a
	// temporary, the program is ill-formed.
	if (auto *ExtendingDecl =
	ExtendingEntity ? ExtendingEntity->getDecl() : nullptr) {
	if (IsGslPtrInitWithGslTempOwner) {
	Diag(DiagLoc, diag::warn_dangling_lifetime_pointer_member)
	<< ExtendingDecl << DiagRange;
	Diag(ExtendingDecl->getLocation(),
	diag::note_ref_or_ptr_member_declared_here)
	<< true;
	return false;
	}
	bool IsSubobjectMember = ExtendingEntity != &Entity;
	Diag(DiagLoc, shouldLifetimeExtendThroughPath(Path) !=
	PathLifetimeKind::NoExtend
	? diag::err_dangling_member
	: diag::warn_dangling_member)
	<< ExtendingDecl << IsSubobjectMember << RK << DiagRange;
	// Don't bother adding a note pointing to the field if we're inside
	// its default member initializer; our primary diagnostic points to
	// the same place in that case.
	if (Path.empty() \|\|
	Path.back().Kind != IndirectLocalPathEntry::DefaultInit) {
	Diag(ExtendingDecl->getLocation(),
	diag::note_lifetime_extending_member_declared_here)
	<< RK << IsSubobjectMember;
	}
	} else {
	// We have a mem-initializer but no particular field within it; this
	// is either a base class or a delegating initializer directly
	// initializing the base-class from something that doesn't live long
	// enough.
	//
	// FIXME: Warn on this.
	return false;
	}
	} else {
	// Paths via a default initializer can only occur during error recovery
	// (there's no other way that a default initializer can refer to a
	// local). Don't produce a bogus warning on those cases.
	if (pathContainsInit(Path))
	return false;

	// Suppress false positives for code like the one below:
	// Ctor(unique_ptr<T> up) : member(*up), member2(move(up)) {}
	if (IsLocalGslOwner && pathOnlyInitializesGslPointer(Path))
	return false;

	auto *DRE = dyn_cast<DeclRefExpr>(L);
	auto *VD = DRE ? dyn_cast<VarDecl>(DRE->getDecl()) : nullptr;
	if (!VD) {
	// A member was initialized to a local block.
	// FIXME: Warn on this.
	return false;
	}

	if (auto *Member =
	ExtendingEntity ? ExtendingEntity->getDecl() : nullptr) {
	bool IsPointer = !Member->getType()->isReferenceType();
	Diag(DiagLoc, IsPointer ? diag::warn_init_ptr_member_to_parameter_addr
	: diag::warn_bind_ref_member_to_parameter)
	<< Member << VD << isa<ParmVarDecl>(VD) << DiagRange;
	Diag(Member->getLocation(),
	diag::note_ref_or_ptr_member_declared_here)
	<< (unsigned)IsPointer;
	}
	}
	break;
	}

	case LK_New:
	if (isa<MaterializeTemporaryExpr>(L)) {
	if (IsGslPtrInitWithGslTempOwner)
	Diag(DiagLoc, diag::warn_dangling_lifetime_pointer) << DiagRange;
	else
	Diag(DiagLoc, RK == RK_ReferenceBinding
	? diag::warn_new_dangling_reference
	: diag::warn_new_dangling_initializer_list)
	<< !Entity.getParent() << DiagRange;
	} else {
	// We can't determine if the allocation outlives the local declaration.
	return false;
	}
	break;

	case LK_Return:
	case LK_StmtExprResult:
	if (auto *DRE = dyn_cast<DeclRefExpr>(L)) {
	// We can't determine if the local variable outlives the statement
	// expression.
	if (LK == LK_StmtExprResult)
	return false;
	Diag(DiagLoc, diag::warn_ret_stack_addr_ref)
	<< Entity.getType()->isReferenceType() << DRE->getDecl()
	<< isa<ParmVarDecl>(DRE->getDecl()) << DiagRange;
	} else if (isa<BlockExpr>(L)) {
	Diag(DiagLoc, diag::err_ret_local_block) << DiagRange;
	} else if (isa<AddrLabelExpr>(L)) {
	// Don't warn when returning a label from a statement expression.
	// Leaving the scope doesn't end its lifetime.
	if (LK == LK_StmtExprResult)
	return false;
	Diag(DiagLoc, diag::warn_ret_addr_label) << DiagRange;
	} else {
	Diag(DiagLoc, diag::warn_ret_local_temp_addr_ref)
	<< Entity.getType()->isReferenceType() << DiagRange;
	}
	break;
	}

	for (unsigned I = 0; I != Path.size(); ++I) {
	auto Elem = Path[I];

	switch (Elem.Kind) {
	case IndirectLocalPathEntry::AddressOf:
	case IndirectLocalPathEntry::LValToRVal:
	// These exist primarily to mark the path as not permitting or
	// supporting lifetime extension.
	break;

	case IndirectLocalPathEntry::LifetimeBoundCall:
	case IndirectLocalPathEntry::TemporaryCopy:
	case IndirectLocalPathEntry::GslPointerInit:
	case IndirectLocalPathEntry::GslReferenceInit:
	// FIXME: Consider adding a note for these.
	break;

	case IndirectLocalPathEntry::DefaultInit: {
	auto *FD = cast<FieldDecl>(Elem.D);
	Diag(FD->getLocation(), diag::note_init_with_default_member_initalizer)
	<< FD << nextPathEntryRange(Path, I + 1, L);
	break;
	}

	case IndirectLocalPathEntry::VarInit: {
	const VarDecl *VD = cast<VarDecl>(Elem.D);
	Diag(VD->getLocation(), diag::note_local_var_initializer)
	<< VD->getType()->isReferenceType()
	<< VD->isImplicit() << VD->getDeclName()
	<< nextPathEntryRange(Path, I + 1, L);
	break;
	}

	case IndirectLocalPathEntry::LambdaCaptureInit:
	if (!Elem.Capture->capturesVariable())
	break;
	// FIXME: We can't easily tell apart an init-capture from a nested
	// capture of an init-capture.
	const ValueDecl *VD = Elem.Capture->getCapturedVar();
	Diag(Elem.Capture->getLocation(), diag::note_lambda_capture_initializer)
	<< VD << VD->isInitCapture() << Elem.Capture->isExplicit()
	<< (Elem.Capture->getCaptureKind() == LCK_ByRef) << VD
	<< nextPathEntryRange(Path, I + 1, L);
	break;
	}
	}

	// We didn't lifetime-extend, so don't go any further; we don't need more
	// warnings or errors on inner temporaries within this one's initializer.
	return false;
	};

	bool EnableLifetimeWarnings = !getDiagnostics().isIgnored(
	diag::warn_dangling_lifetime_pointer, SourceLocation());
	llvm::SmallVector<IndirectLocalPathEntry, 8> Path;
	if (Init->isGLValue())
	visitLocalsRetainedByReferenceBinding(Path, Init, RK_ReferenceBinding,
	TemporaryVisitor,
	EnableLifetimeWarnings);
	else
	visitLocalsRetainedByInitializer(Path, Init, TemporaryVisitor, false,
	EnableLifetimeWarnings);
	}

	static void DiagnoseNarrowingInInitList(Sema &S,
	const ImplicitConversionSequence &ICS,
	QualType PreNarrowingType,
	QualType EntityType,
	const Expr *PostInit);

	/// Provide warnings when std::move is used on construction.
	static void CheckMoveOnConstruction(Sema &S, const Expr *InitExpr,
	bool IsReturnStmt) {
	if (!InitExpr)
	return;

	if (S.inTemplateInstantiation())
	return;

	QualType DestType = InitExpr->getType();
	if (!DestType->isRecordType())
	return;

	unsigned DiagID = 0;
	if (IsReturnStmt) {
	const CXXConstructExpr *CCE =
	dyn_cast<CXXConstructExpr>(InitExpr->IgnoreParens());
	if (!CCE \|\| CCE->getNumArgs() != 1)
	return;

	if (!CCE->getConstructor()->isCopyOrMoveConstructor())
	return;

	InitExpr = CCE->getArg(0)->IgnoreImpCasts();
	}

	// Find the std::move call and get the argument.
	const CallExpr *CE = dyn_cast<CallExpr>(InitExpr->IgnoreParens());
	if (!CE \|\| !CE->isCallToStdMove())
	return;

	const Expr *Arg = CE->getArg(0)->IgnoreImplicit();

	if (IsReturnStmt) {
	const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Arg->IgnoreParenImpCasts());
	if (!DRE \|\| DRE->refersToEnclosingVariableOrCapture())
	return;

	const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl());
	if (!VD \|\| !VD->hasLocalStorage())
	return;

	// __block variables are not moved implicitly.
	if (VD->hasAttr<BlocksAttr>())
	return;

	QualType SourceType = VD->getType();
	if (!SourceType->isRecordType())
	return;

	if (!S.Context.hasSameUnqualifiedType(DestType, SourceType)) {
	return;
	}

	// If we're returning a function parameter, copy elision
	// is not possible.
	if (isa<ParmVarDecl>(VD))
	DiagID = diag::warn_redundant_move_on_return;
	else
	DiagID = diag::warn_pessimizing_move_on_return;
	} else {
	DiagID = diag::warn_pessimizing_move_on_initialization;
	const Expr *ArgStripped = Arg->IgnoreImplicit()->IgnoreParens();
	if (!ArgStripped->isPRValue() \|\| !ArgStripped->getType()->isRecordType())
	return;
	}

	S.Diag(CE->getBeginLoc(), DiagID);

	// Get all the locations for a fix-it. Don't emit the fix-it if any location
	// is within a macro.
	SourceLocation CallBegin = CE->getCallee()->getBeginLoc();
	if (CallBegin.isMacroID())
	return;
	SourceLocation RParen = CE->getRParenLoc();
	if (RParen.isMacroID())
	return;
	SourceLocation LParen;
	SourceLocation ArgLoc = Arg->getBeginLoc();

	// Special testing for the argument location. Since the fix-it needs the
	// location right before the argument, the argument location can be in a
	// macro only if it is at the beginning of the macro.
	while (ArgLoc.isMacroID() &&
	S.getSourceManager().isAtStartOfImmediateMacroExpansion(ArgLoc)) {
	ArgLoc = S.getSourceManager().getImmediateExpansionRange(ArgLoc).getBegin();
	}

	if (LParen.isMacroID())
	return;

	LParen = ArgLoc.getLocWithOffset(-1);

	S.Diag(CE->getBeginLoc(), diag::note_remove_move)
	<< FixItHint::CreateRemoval(SourceRange(CallBegin, LParen))
	<< FixItHint::CreateRemoval(SourceRange(RParen, RParen));
	}

	static void CheckForNullPointerDereference(Sema &S, const Expr *E) {
	// Check to see if we are dereferencing a null pointer. If so, this is
	// undefined behavior, so warn about it. This only handles the pattern
	// "*null", which is a very syntactic check.
	if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E->IgnoreParenCasts()))
	if (UO->getOpcode() == UO_Deref &&
	UO->getSubExpr()->IgnoreParenCasts()->
	isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull)) {
	S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO,
	S.PDiag(diag::warn_binding_null_to_reference)
	<< UO->getSubExpr()->getSourceRange());
	}
	}

	MaterializeTemporaryExpr *
	Sema::CreateMaterializeTemporaryExpr(QualType T, Expr *Temporary,
	bool BoundToLvalueReference) {
	auto MTE = new (Context)
	MaterializeTemporaryExpr(T, Temporary, BoundToLvalueReference);

	// Order an ExprWithCleanups for lifetime marks.
	//
	// TODO: It'll be good to have a single place to check the access of the
	// destructor and generate ExprWithCleanups for various uses. Currently these
	// are done in both CreateMaterializeTemporaryExpr and MaybeBindToTemporary,
	// but there may be a chance to merge them.
	Cleanup.setExprNeedsCleanups(false);
	return MTE;
	}

	ExprResult Sema::TemporaryMaterializationConversion(Expr *E) {
	// In C++98, we don't want to implicitly create an xvalue.
	// FIXME: This means that AST consumers need to deal with "prvalues" that
	// denote materialized temporaries. Maybe we should add another ValueKind
	// for "xvalue pretending to be a prvalue" for C++98 support.
	if (!E->isPRValue() \|\| !getLangOpts().CPlusPlus11)
	return E;

	// C++1z [conv.rval]/1: T shall be a complete type.
	// FIXME: Does this ever matter (can we form a prvalue of incomplete type)?
	// If so, we should check for a non-abstract class type here too.
	QualType T = E->getType();
	if (RequireCompleteType(E->getExprLoc(), T, diag::err_incomplete_type))
	return ExprError();

	return CreateMaterializeTemporaryExpr(E->getType(), E, false);
	}

	ExprResult Sema::PerformQualificationConversion(Expr *E, QualType Ty,
	ExprValueKind VK,
	CheckedConversionKind CCK) {

	CastKind CK = CK_NoOp;

	if (VK == VK_PRValue) {
	auto PointeeTy = Ty->getPointeeType();
	auto ExprPointeeTy = E->getType()->getPointeeType();
	if (!PointeeTy.isNull() &&
	PointeeTy.getAddressSpace() != ExprPointeeTy.getAddressSpace())
	CK = CK_AddressSpaceConversion;
	} else if (Ty.getAddressSpace() != E->getType().getAddressSpace()) {
	CK = CK_AddressSpaceConversion;
	}

	return ImpCastExprToType(E, Ty, CK, VK, /BasePath=/nullptr, CCK);
	}

	ExprResult InitializationSequence::Perform(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	MultiExprArg Args,
	QualType *ResultType) {
	if (Failed()) {
	Diagnose(S, Entity, Kind, Args);
	return ExprError();
	}
	if (!ZeroInitializationFixit.empty()) {
	const Decl *D = Entity.getDecl();
	const auto *VD = dyn_cast_or_null<VarDecl>(D);
	QualType DestType = Entity.getType();

	// The initialization would have succeeded with this fixit. Since the fixit
	// is on the error, we need to build a valid AST in this case, so this isn't
	// handled in the Failed() branch above.
	if (!DestType->isRecordType() && VD && VD->isConstexpr()) {
	// Use a more useful diagnostic for constexpr variables.
	S.Diag(Kind.getLocation(), diag::err_constexpr_var_requires_const_init)
	<< VD
	<< FixItHint::CreateInsertion(ZeroInitializationFixitLoc,
	ZeroInitializationFixit);
	} else {
	unsigned DiagID = diag::err_default_init_const;
	if (S.getLangOpts().MSVCCompat && D && D->hasAttr<SelectAnyAttr>())
	DiagID = diag::ext_default_init_const;

	S.Diag(Kind.getLocation(), DiagID)
	<< DestType << (bool)DestType->getAs<RecordType>()
	<< FixItHint::CreateInsertion(ZeroInitializationFixitLoc,
	ZeroInitializationFixit);
	}
	}

	if (getKind() == DependentSequence) {
	// If the declaration is a non-dependent, incomplete array type
	// that has an initializer, then its type will be completed once
	// the initializer is instantiated.
	if (ResultType && !Entity.getType()->isDependentType() &&
	Args.size() == 1) {
	QualType DeclType = Entity.getType();
	if (const IncompleteArrayType *ArrayT
	= S.Context.getAsIncompleteArrayType(DeclType)) {
	// FIXME: We don't currently have the ability to accurately
	// compute the length of an initializer list without
	// performing full type-checking of the initializer list
	// (since we have to determine where braces are implicitly
	// introduced and such). So, we fall back to making the array
	// type a dependently-sized array type with no specified
	// bound.
	if (isa<InitListExpr>((Expr *)Args[0])) {
	SourceRange Brackets;

	// Scavange the location of the brackets from the entity, if we can.
	if (auto *DD = dyn_cast_or_null<DeclaratorDecl>(Entity.getDecl())) {
	if (TypeSourceInfo *TInfo = DD->getTypeSourceInfo()) {
	TypeLoc TL = TInfo->getTypeLoc();
	if (IncompleteArrayTypeLoc ArrayLoc =
	TL.getAs<IncompleteArrayTypeLoc>())
	Brackets = ArrayLoc.getBracketsRange();
	}
	}

	*ResultType
	= S.Context.getDependentSizedArrayType(ArrayT->getElementType(),
	/NumElts=/nullptr,
	ArrayT->getSizeModifier(),
	ArrayT->getIndexTypeCVRQualifiers(),
	Brackets);
	}

	}
	}
	if (Kind.getKind() == InitializationKind::IK_Direct &&
	!Kind.isExplicitCast()) {
	// Rebuild the ParenListExpr.
	SourceRange ParenRange = Kind.getParenOrBraceRange();
	return S.ActOnParenListExpr(ParenRange.getBegin(), ParenRange.getEnd(),
	Args);
	}
	assert(Kind.getKind() == InitializationKind::IK_Copy \|\|
	Kind.isExplicitCast() \|\|
	Kind.getKind() == InitializationKind::IK_DirectList);
	return ExprResult(Args[0]);
	}

	// No steps means no initialization.
	if (Steps.empty())
	return ExprResult((Expr *)nullptr);

	if (S.getLangOpts().CPlusPlus11 && Entity.getType()->isReferenceType() &&
	Args.size() == 1 && isa<InitListExpr>(Args[0]) &&
	!Entity.isParamOrTemplateParamKind()) {
	// Produce a C++98 compatibility warning if we are initializing a reference
	// from an initializer list. For parameters, we produce a better warning
	// elsewhere.
	Expr *Init = Args[0];
	S.Diag(Init->getBeginLoc(), diag::warn_cxx98_compat_reference_list_init)
	<< Init->getSourceRange();
	}

	// OpenCL v2.0 s6.13.11.1. atomic variables can be initialized in global scope
	QualType ETy = Entity.getType();
	bool HasGlobalAS = ETy.hasAddressSpace() &&
	ETy.getAddressSpace() == LangAS::opencl_global;

	if (S.getLangOpts().OpenCLVersion >= 200 &&
	ETy->isAtomicType() && !HasGlobalAS &&
	Entity.getKind() == InitializedEntity::EK_Variable && Args.size() > 0) {
	S.Diag(Args[0]->getBeginLoc(), diag::err_opencl_atomic_init)
	<< 1
	<< SourceRange(Entity.getDecl()->getBeginLoc(), Args[0]->getEndLoc());
	return ExprError();
	}

	QualType DestType = Entity.getType().getNonReferenceType();
	// FIXME: Ugly hack around the fact that Entity.getType() is not
	// the same as Entity.getDecl()->getType() in cases involving type merging,
	// and we want latter when it makes sense.
	if (ResultType)
	*ResultType = Entity.getDecl() ? Entity.getDecl()->getType() :
	Entity.getType();

	ExprResult CurInit((Expr *)nullptr);
	SmallVector<Expr*, 4> ArrayLoopCommonExprs;

	// HLSL allows vector initialization to function like list initialization, but
	// use the syntax of a C++-like constructor.
	bool IsHLSLVectorInit = S.getLangOpts().HLSL && DestType->isExtVectorType() &&
	isa<InitListExpr>(Args[0]);
	(void)IsHLSLVectorInit;

	// For initialization steps that start with a single initializer,
	// grab the only argument out the Args and place it into the "current"
	// initializer.
	switch (Steps.front().Kind) {
	case SK_ResolveAddressOfOverloadedFunction:
	case SK_CastDerivedToBasePRValue:
	case SK_CastDerivedToBaseXValue:
	case SK_CastDerivedToBaseLValue:
	case SK_BindReference:
	case SK_BindReferenceToTemporary:
	case SK_FinalCopy:
	case SK_ExtraneousCopyToTemporary:
	case SK_UserConversion:
	case SK_QualificationConversionLValue:
	case SK_QualificationConversionXValue:
	case SK_QualificationConversionPRValue:
	case SK_FunctionReferenceConversion:
	case SK_AtomicConversion:
	case SK_ConversionSequence:
	case SK_ConversionSequenceNoNarrowing:
	case SK_ListInitialization:
	case SK_UnwrapInitList:
	case SK_RewrapInitList:
	case SK_CAssignment:
	case SK_StringInit:
	case SK_ObjCObjectConversion:
	case SK_ArrayLoopIndex:
	case SK_ArrayLoopInit:
	case SK_ArrayInit:
	case SK_GNUArrayInit:
	case SK_ParenthesizedArrayInit:
	case SK_PassByIndirectCopyRestore:
	case SK_PassByIndirectRestore:
	case SK_ProduceObjCObject:
	case SK_StdInitializerList:
	case SK_OCLSamplerInit:
	case SK_OCLZeroOpaqueType: {
	assert(Args.size() == 1 \|\| IsHLSLVectorInit);
	CurInit = Args[0];
	if (!CurInit.get()) return ExprError();
	break;
	}

	case SK_ConstructorInitialization:
	case SK_ConstructorInitializationFromList:
	case SK_StdInitializerListConstructorCall:
	case SK_ZeroInitialization:
	case SK_ParenthesizedListInit:
	break;
	}

	// Promote from an unevaluated context to an unevaluated list context in
	// C++11 list-initialization; we need to instantiate entities usable in
	// constant expressions here in order to perform narrowing checks =(
	EnterExpressionEvaluationContext Evaluated(
	S, EnterExpressionEvaluationContext::InitList,
	CurInit.get() && isa<InitListExpr>(CurInit.get()));

	// C++ [class.abstract]p2:
	// no objects of an abstract class can be created except as subobjects
	// of a class derived from it
	auto checkAbstractType = [&](QualType T) -> bool {
	if (Entity.getKind() == InitializedEntity::EK_Base \|\|
	Entity.getKind() == InitializedEntity::EK_Delegating)
	return false;
	return S.RequireNonAbstractType(Kind.getLocation(), T,
	diag::err_allocation_of_abstract_type);
	};

	// Walk through the computed steps for the initialization sequence,
	// performing the specified conversions along the way.
	bool ConstructorInitRequiresZeroInit = false;
	for (step_iterator Step = step_begin(), StepEnd = step_end();
	Step != StepEnd; ++Step) {
	if (CurInit.isInvalid())
	return ExprError();

	QualType SourceType = CurInit.get() ? CurInit.get()->getType() : QualType();

	switch (Step->Kind) {
	case SK_ResolveAddressOfOverloadedFunction:
	// Overload resolution determined which function invoke; update the
	// initializer to reflect that choice.
	S.CheckAddressOfMemberAccess(CurInit.get(), Step->Function.FoundDecl);
	if (S.DiagnoseUseOfDecl(Step->Function.FoundDecl, Kind.getLocation()))
	return ExprError();
	CurInit = S.FixOverloadedFunctionReference(CurInit,
	Step->Function.FoundDecl,
	Step->Function.Function);
	// We might get back another placeholder expression if we resolved to a
	// builtin.
	if (!CurInit.isInvalid())
	CurInit = S.CheckPlaceholderExpr(CurInit.get());
	break;

	case SK_CastDerivedToBasePRValue:
	case SK_CastDerivedToBaseXValue:
	case SK_CastDerivedToBaseLValue: {
	// We have a derived-to-base cast that produces either an rvalue or an
	// lvalue. Perform that cast.

	CXXCastPath BasePath;

	// Casts to inaccessible base classes are allowed with C-style casts.
	bool IgnoreBaseAccess = Kind.isCStyleOrFunctionalCast();
	if (S.CheckDerivedToBaseConversion(
	SourceType, Step->Type, CurInit.get()->getBeginLoc(),
	CurInit.get()->getSourceRange(), &BasePath, IgnoreBaseAccess))
	return ExprError();

	ExprValueKind VK =
	Step->Kind == SK_CastDerivedToBaseLValue
	? VK_LValue
	: (Step->Kind == SK_CastDerivedToBaseXValue ? VK_XValue
	: VK_PRValue);
	CurInit = ImplicitCastExpr::Create(S.Context, Step->Type,
	CK_DerivedToBase, CurInit.get(),
	&BasePath, VK, FPOptionsOverride());
	break;
	}

	case SK_BindReference:
	// Reference binding does not have any corresponding ASTs.

	// Check exception specifications
	if (S.CheckExceptionSpecCompatibility(CurInit.get(), DestType))
	return ExprError();

	// We don't check for e.g. function pointers here, since address
	// availability checks should only occur when the function first decays
	// into a pointer or reference.
	if (CurInit.get()->getType()->isFunctionProtoType()) {
	if (auto *DRE = dyn_cast<DeclRefExpr>(CurInit.get()->IgnoreParens())) {
	if (auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
	if (!S.checkAddressOfFunctionIsAvailable(FD, /Complain=/true,
	DRE->getBeginLoc()))
	return ExprError();
	}
	}
	}

	CheckForNullPointerDereference(S, CurInit.get());
	break;

	case SK_BindReferenceToTemporary: {
	// Make sure the "temporary" is actually an rvalue.
	assert(CurInit.get()->isPRValue() && "not a temporary");

	// Check exception specifications
	if (S.CheckExceptionSpecCompatibility(CurInit.get(), DestType))
	return ExprError();

	QualType MTETy = Step->Type;

	// When this is an incomplete array type (such as when this is
	// initializing an array of unknown bounds from an init list), use THAT
	// type instead so that we propagate the array bounds.
	if (MTETy->isIncompleteArrayType() &&
	!CurInit.get()->getType()->isIncompleteArrayType() &&
	S.Context.hasSameType(
	MTETy->getPointeeOrArrayElementType(),
	CurInit.get()->getType()->getPointeeOrArrayElementType()))
	MTETy = CurInit.get()->getType();

	// Materialize the temporary into memory.
	MaterializeTemporaryExpr *MTE = S.CreateMaterializeTemporaryExpr(
	MTETy, CurInit.get(), Entity.getType()->isLValueReferenceType());
	CurInit = MTE;

	// If we're extending this temporary to automatic storage duration -- we
	// need to register its cleanup during the full-expression's cleanups.
	if (MTE->getStorageDuration() == SD_Automatic &&
	MTE->getType().isDestructedType())
	S.Cleanup.setExprNeedsCleanups(true);
	break;
	}

	case SK_FinalCopy:
	if (checkAbstractType(Step->Type))
	return ExprError();

	// If the overall initialization is initializing a temporary, we already
	// bound our argument if it was necessary to do so. If not (if we're
	// ultimately initializing a non-temporary), our argument needs to be
	// bound since it's initializing a function parameter.
	// FIXME: This is a mess. Rationalize temporary destruction.
	if (!shouldBindAsTemporary(Entity))
	CurInit = S.MaybeBindToTemporary(CurInit.get());
	CurInit = CopyObject(S, Step->Type, Entity, CurInit,
	/IsExtraneousCopy=/false);
	break;

	case SK_ExtraneousCopyToTemporary:
	CurInit = CopyObject(S, Step->Type, Entity, CurInit,
	/IsExtraneousCopy=/true);
	break;

	case SK_UserConversion: {
	// We have a user-defined conversion that invokes either a constructor
	// or a conversion function.
	CastKind CastKind;
	FunctionDecl *Fn = Step->Function.Function;
	DeclAccessPair FoundFn = Step->Function.FoundDecl;
	bool HadMultipleCandidates = Step->Function.HadMultipleCandidates;
	bool CreatedObject = false;
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Fn)) {
	// Build a call to the selected constructor.
	SmallVector<Expr*, 8> ConstructorArgs;
	SourceLocation Loc = CurInit.get()->getBeginLoc();

	// Determine the arguments required to actually perform the constructor
	// call.
	Expr *Arg = CurInit.get();
	if (S.CompleteConstructorCall(Constructor, Step->Type,
	MultiExprArg(&Arg, 1), Loc,
	ConstructorArgs))
	return ExprError();

	// Build an expression that constructs a temporary.
	CurInit = S.BuildCXXConstructExpr(Loc, Step->Type,
	FoundFn, Constructor,
	ConstructorArgs,
	HadMultipleCandidates,
	/ListInit/ false,
	/StdInitListInit/ false,
	/ZeroInit/ false,
	CXXConstructExpr::CK_Complete,
	SourceRange());
	if (CurInit.isInvalid())
	return ExprError();

	S.CheckConstructorAccess(Kind.getLocation(), Constructor, FoundFn,
	Entity);
	if (S.DiagnoseUseOfDecl(FoundFn, Kind.getLocation()))
	return ExprError();

	CastKind = CK_ConstructorConversion;
	CreatedObject = true;
	} else {
	// Build a call to the conversion function.
	CXXConversionDecl *Conversion = cast<CXXConversionDecl>(Fn);
	S.CheckMemberOperatorAccess(Kind.getLocation(), CurInit.get(), nullptr,
	FoundFn);
	if (S.DiagnoseUseOfDecl(FoundFn, Kind.getLocation()))
	return ExprError();

	CurInit = S.BuildCXXMemberCallExpr(CurInit.get(), FoundFn, Conversion,
	HadMultipleCandidates);
	if (CurInit.isInvalid())
	return ExprError();

	CastKind = CK_UserDefinedConversion;
	CreatedObject = Conversion->getReturnType()->isRecordType();
	}

	if (CreatedObject && checkAbstractType(CurInit.get()->getType()))
	return ExprError();

	CurInit = ImplicitCastExpr::Create(
	S.Context, CurInit.get()->getType(), CastKind, CurInit.get(), nullptr,
	CurInit.get()->getValueKind(), S.CurFPFeatureOverrides());

	if (shouldBindAsTemporary(Entity))
	// The overall entity is temporary, so this expression should be
	// destroyed at the end of its full-expression.
	CurInit = S.MaybeBindToTemporary(CurInit.getAs<Expr>());
	else if (CreatedObject && shouldDestroyEntity(Entity)) {
	// The object outlasts the full-expression, but we need to prepare for
	// a destructor being run on it.
	// FIXME: It makes no sense to do this here. This should happen
	// regardless of how we initialized the entity.
	QualType T = CurInit.get()->getType();
	if (const RecordType *Record = T->getAs<RecordType>()) {
	CXXDestructorDecl *Destructor
	= S.LookupDestructor(cast<CXXRecordDecl>(Record->getDecl()));
	S.CheckDestructorAccess(CurInit.get()->getBeginLoc(), Destructor,
	S.PDiag(diag::err_access_dtor_temp) << T);
	S.MarkFunctionReferenced(CurInit.get()->getBeginLoc(), Destructor);
	if (S.DiagnoseUseOfDecl(Destructor, CurInit.get()->getBeginLoc()))
	return ExprError();
	}
	}
	break;
	}

	case SK_QualificationConversionLValue:
	case SK_QualificationConversionXValue:
	case SK_QualificationConversionPRValue: {
	// Perform a qualification conversion; these can never go wrong.
	ExprValueKind VK =
	Step->Kind == SK_QualificationConversionLValue
	? VK_LValue
	: (Step->Kind == SK_QualificationConversionXValue ? VK_XValue
	: VK_PRValue);
	CurInit = S.PerformQualificationConversion(CurInit.get(), Step->Type, VK);
	break;
	}

	case SK_FunctionReferenceConversion:
	assert(CurInit.get()->isLValue() &&
	"function reference should be lvalue");
	CurInit =
	S.ImpCastExprToType(CurInit.get(), Step->Type, CK_NoOp, VK_LValue);
	break;

	case SK_AtomicConversion: {
	assert(CurInit.get()->isPRValue() && "cannot convert glvalue to atomic");
	CurInit = S.ImpCastExprToType(CurInit.get(), Step->Type,
	CK_NonAtomicToAtomic, VK_PRValue);
	break;
	}

	case SK_ConversionSequence:
	case SK_ConversionSequenceNoNarrowing: {
	if (const auto *FromPtrType =
	CurInit.get()->getType()->getAs<PointerType>()) {
	if (const auto *ToPtrType = Step->Type->getAs<PointerType>()) {
	if (FromPtrType->getPointeeType()->hasAttr(attr::NoDeref) &&
	!ToPtrType->getPointeeType()->hasAttr(attr::NoDeref)) {
	// Do not check static casts here because they are checked earlier
	// in Sema::ActOnCXXNamedCast()
	if (!Kind.isStaticCast()) {
	S.Diag(CurInit.get()->getExprLoc(),
	diag::warn_noderef_to_dereferenceable_pointer)
	<< CurInit.get()->getSourceRange();
	}
	}
	}
	}

	Sema::CheckedConversionKind CCK
	= Kind.isCStyleCast()? Sema::CCK_CStyleCast
	: Kind.isFunctionalCast()? Sema::CCK_FunctionalCast
	: Kind.isExplicitCast()? Sema::CCK_OtherCast
	: Sema::CCK_ImplicitConversion;
	ExprResult CurInitExprRes =
	S.PerformImplicitConversion(CurInit.get(), Step->Type, *Step->ICS,
	getAssignmentAction(Entity), CCK);
	if (CurInitExprRes.isInvalid())
	return ExprError();

	S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), CurInit.get());

	CurInit = CurInitExprRes;

	if (Step->Kind == SK_ConversionSequenceNoNarrowing &&
	S.getLangOpts().CPlusPlus)
	DiagnoseNarrowingInInitList(S, *Step->ICS, SourceType, Entity.getType(),
	CurInit.get());

	break;
	}

	case SK_ListInitialization: {
	if (checkAbstractType(Step->Type))
	return ExprError();

	InitListExpr *InitList = cast<InitListExpr>(CurInit.get());
	// If we're not initializing the top-level entity, we need to create an
	// InitializeTemporary entity for our target type.
	QualType Ty = Step->Type;
	bool IsTemporary = !S.Context.hasSameType(Entity.getType(), Ty);
	InitializedEntity TempEntity = InitializedEntity::InitializeTemporary(Ty);
	InitializedEntity InitEntity = IsTemporary ? TempEntity : Entity;
	InitListChecker PerformInitList(S, InitEntity,
	InitList, Ty, /VerifyOnly=/false,
	/TreatUnavailableAsInvalid=/false);
	if (PerformInitList.HadError())
	return ExprError();

	// Hack: We must update *ResultType if available in order to set the
	// bounds of arrays, e.g. in 'int ar[] = {1, 2, 3};'.
	// Worst case: 'const int (&arref)[] = {1, 2, 3};'.
	if (ResultType &&
	ResultType->getNonReferenceType()->isIncompleteArrayType()) {
	if ((*ResultType)->isRValueReferenceType())
	Ty = S.Context.getRValueReferenceType(Ty);
	else if ((*ResultType)->isLValueReferenceType())
	Ty = S.Context.getLValueReferenceType(Ty,
	(*ResultType)->castAs<LValueReferenceType>()->isSpelledAsLValue());
	*ResultType = Ty;
	}

	InitListExpr *StructuredInitList =
	PerformInitList.getFullyStructuredList();
	CurInit.get();
	CurInit = shouldBindAsTemporary(InitEntity)
	? S.MaybeBindToTemporary(StructuredInitList)
	: StructuredInitList;
	break;
	}

	case SK_ConstructorInitializationFromList: {
	if (checkAbstractType(Step->Type))
	return ExprError();

	// When an initializer list is passed for a parameter of type "reference
	// to object", we don't get an EK_Temporary entity, but instead an
	// EK_Parameter entity with reference type.
	// FIXME: This is a hack. What we really should do is create a user
	// conversion step for this case, but this makes it considerably more
	// complicated. For now, this will do.
	InitializedEntity TempEntity = InitializedEntity::InitializeTemporary(
	Entity.getType().getNonReferenceType());
	bool UseTemporary = Entity.getType()->isReferenceType();
	assert(Args.size() == 1 && "expected a single argument for list init");
	InitListExpr *InitList = cast<InitListExpr>(Args[0]);
	S.Diag(InitList->getExprLoc(), diag::warn_cxx98_compat_ctor_list_init)
	<< InitList->getSourceRange();
	MultiExprArg Arg(InitList->getInits(), InitList->getNumInits());
	CurInit = PerformConstructorInitialization(S, UseTemporary ? TempEntity :
	Entity,
	Kind, Arg, *Step,
	ConstructorInitRequiresZeroInit,
	/IsListInitialization/true,
	/IsStdInitListInit/false,
	InitList->getLBraceLoc(),
	InitList->getRBraceLoc());
	break;
	}

	case SK_UnwrapInitList:
	CurInit = cast<InitListExpr>(CurInit.get())->getInit(0);
	break;

	case SK_RewrapInitList: {
	Expr *E = CurInit.get();
	InitListExpr *Syntactic = Step->WrappingSyntacticList;
	InitListExpr *ILE = new (S.Context) InitListExpr(S.Context,
	Syntactic->getLBraceLoc(), E, Syntactic->getRBraceLoc());
	ILE->setSyntacticForm(Syntactic);
	ILE->setType(E->getType());
	ILE->setValueKind(E->getValueKind());
	CurInit = ILE;
	break;
	}

	case SK_ConstructorInitialization:
	case SK_StdInitializerListConstructorCall: {
	if (checkAbstractType(Step->Type))
	return ExprError();

	// When an initializer list is passed for a parameter of type "reference
	// to object", we don't get an EK_Temporary entity, but instead an
	// EK_Parameter entity with reference type.
	// FIXME: This is a hack. What we really should do is create a user
	// conversion step for this case, but this makes it considerably more
	// complicated. For now, this will do.
	InitializedEntity TempEntity = InitializedEntity::InitializeTemporary(
	Entity.getType().getNonReferenceType());
	bool UseTemporary = Entity.getType()->isReferenceType();
	bool IsStdInitListInit =
	Step->Kind == SK_StdInitializerListConstructorCall;
	Expr *Source = CurInit.get();
	SourceRange Range = Kind.hasParenOrBraceRange()
	? Kind.getParenOrBraceRange()
	: SourceRange();
	CurInit = PerformConstructorInitialization(
	S, UseTemporary ? TempEntity : Entity, Kind,
	Source ? MultiExprArg(Source) : Args, *Step,
	ConstructorInitRequiresZeroInit,
	/IsListInitialization/ IsStdInitListInit,
	/IsStdInitListInitialization/ IsStdInitListInit,
	/LBraceLoc/ Range.getBegin(),
	/RBraceLoc/ Range.getEnd());
	break;
	}

	case SK_ZeroInitialization: {
	step_iterator NextStep = Step;
	++NextStep;
	if (NextStep != StepEnd &&
	(NextStep->Kind == SK_ConstructorInitialization \|\|
	NextStep->Kind == SK_ConstructorInitializationFromList)) {
	// The need for zero-initialization is recorded directly into
	// the call to the object's constructor within the next step.
	ConstructorInitRequiresZeroInit = true;
	} else if (Kind.getKind() == InitializationKind::IK_Value &&
	S.getLangOpts().CPlusPlus &&
	!Kind.isImplicitValueInit()) {
	TypeSourceInfo *TSInfo = Entity.getTypeSourceInfo();
	if (!TSInfo)
	TSInfo = S.Context.getTrivialTypeSourceInfo(Step->Type,
	Kind.getRange().getBegin());

	CurInit = new (S.Context) CXXScalarValueInitExpr(
	Entity.getType().getNonLValueExprType(S.Context), TSInfo,
	Kind.getRange().getEnd());
	} else {
	CurInit = new (S.Context) ImplicitValueInitExpr(Step->Type);
	}
	break;
	}

	case SK_CAssignment: {
	QualType SourceType = CurInit.get()->getType();

	// Save off the initial CurInit in case we need to emit a diagnostic
	ExprResult InitialCurInit = CurInit;
	ExprResult Result = CurInit;
	Sema::AssignConvertType ConvTy =
	S.CheckSingleAssignmentConstraints(Step->Type, Result, true,
	Entity.getKind() == InitializedEntity::EK_Parameter_CF_Audited);
	if (Result.isInvalid())
	return ExprError();
	CurInit = Result;

	// If this is a call, allow conversion to a transparent union.
	ExprResult CurInitExprRes = CurInit;
	if (ConvTy != Sema::Compatible &&
	Entity.isParameterKind() &&
	S.CheckTransparentUnionArgumentConstraints(Step->Type, CurInitExprRes)
	== Sema::Compatible)
	ConvTy = Sema::Compatible;
	if (CurInitExprRes.isInvalid())
	return ExprError();
	CurInit = CurInitExprRes;

	bool Complained;
	if (S.DiagnoseAssignmentResult(ConvTy, Kind.getLocation(),
	Step->Type, SourceType,
	InitialCurInit.get(),
	getAssignmentAction(Entity, true),
	&Complained)) {
	PrintInitLocationNote(S, Entity);
	return ExprError();
	} else if (Complained)
	PrintInitLocationNote(S, Entity);
	break;
	}

	case SK_StringInit: {
	QualType Ty = Step->Type;
	bool UpdateType = ResultType && Entity.getType()->isIncompleteArrayType();
	CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
	S.Context.getAsArrayType(Ty), S);
	break;
	}

	case SK_ObjCObjectConversion:
	CurInit = S.ImpCastExprToType(CurInit.get(), Step->Type,
	CK_ObjCObjectLValueCast,
	CurInit.get()->getValueKind());
	break;

	case SK_ArrayLoopIndex: {
	Expr *Cur = CurInit.get();
	Expr *BaseExpr = new (S.Context)
	OpaqueValueExpr(Cur->getExprLoc(), Cur->getType(),
	Cur->getValueKind(), Cur->getObjectKind(), Cur);
	Expr *IndexExpr =
	new (S.Context) ArrayInitIndexExpr(S.Context.getSizeType());
	CurInit = S.CreateBuiltinArraySubscriptExpr(
	BaseExpr, Kind.getLocation(), IndexExpr, Kind.getLocation());
	ArrayLoopCommonExprs.push_back(BaseExpr);
	break;
	}

	case SK_ArrayLoopInit: {
	assert(!ArrayLoopCommonExprs.empty() &&
	"mismatched SK_ArrayLoopIndex and SK_ArrayLoopInit");
	Expr *Common = ArrayLoopCommonExprs.pop_back_val();
	CurInit = new (S.Context) ArrayInitLoopExpr(Step->Type, Common,
	CurInit.get());
	break;
	}

	case SK_GNUArrayInit:
	// Okay: we checked everything before creating this step. Note that
	// this is a GNU extension.
	S.Diag(Kind.getLocation(), diag::ext_array_init_copy)
	<< Step->Type << CurInit.get()->getType()
	<< CurInit.get()->getSourceRange();
	updateGNUCompoundLiteralRValue(CurInit.get());
	[[fallthrough]];
	case SK_ArrayInit:
	// If the destination type is an incomplete array type, update the
	// type accordingly.
	if (ResultType) {
	if (const IncompleteArrayType *IncompleteDest
	= S.Context.getAsIncompleteArrayType(Step->Type)) {
	if (const ConstantArrayType *ConstantSource
	= S.Context.getAsConstantArrayType(CurInit.get()->getType())) {
	*ResultType = S.Context.getConstantArrayType(
	IncompleteDest->getElementType(),
	ConstantSource->getSize(),
	ConstantSource->getSizeExpr(),
	ArrayType::Normal, 0);
	}
	}
	}
	break;

	case SK_ParenthesizedArrayInit:
	// Okay: we checked everything before creating this step. Note that
	// this is a GNU extension.
	S.Diag(Kind.getLocation(), diag::ext_array_init_parens)
	<< CurInit.get()->getSourceRange();
	break;

	case SK_PassByIndirectCopyRestore:
	case SK_PassByIndirectRestore:
	checkIndirectCopyRestoreSource(S, CurInit.get());
	CurInit = new (S.Context) ObjCIndirectCopyRestoreExpr(
	CurInit.get(), Step->Type,
	Step->Kind == SK_PassByIndirectCopyRestore);
	break;

	case SK_ProduceObjCObject:
	CurInit = ImplicitCastExpr::Create(
	S.Context, Step->Type, CK_ARCProduceObject, CurInit.get(), nullptr,
	VK_PRValue, FPOptionsOverride());
	break;

	case SK_StdInitializerList: {
	S.Diag(CurInit.get()->getExprLoc(),
	diag::warn_cxx98_compat_initializer_list_init)
	<< CurInit.get()->getSourceRange();

	// Materialize the temporary into memory.
	MaterializeTemporaryExpr *MTE = S.CreateMaterializeTemporaryExpr(
	CurInit.get()->getType(), CurInit.get(),
	/BoundToLvalueReference=/false);

	// Wrap it in a construction of a std::initializer_list<T>.
	CurInit = new (S.Context) CXXStdInitializerListExpr(Step->Type, MTE);

	// Bind the result, in case the library has given initializer_list a
	// non-trivial destructor.
	if (shouldBindAsTemporary(Entity))
	CurInit = S.MaybeBindToTemporary(CurInit.get());
	break;
	}

	case SK_OCLSamplerInit: {
	// Sampler initialization have 5 cases:
	// 1. function argument passing
	// 1a. argument is a file-scope variable
	// 1b. argument is a function-scope variable
	// 1c. argument is one of caller function's parameters
	// 2. variable initialization
	// 2a. initializing a file-scope variable
	// 2b. initializing a function-scope variable
	//
	// For file-scope variables, since they cannot be initialized by function
	// call of __translate_sampler_initializer in LLVM IR, their references
	// need to be replaced by a cast from their literal initializers to
	// sampler type. Since sampler variables can only be used in function
	// calls as arguments, we only need to replace them when handling the
	// argument passing.
	assert(Step->Type->isSamplerT() &&
	"Sampler initialization on non-sampler type.");
	Expr *Init = CurInit.get()->IgnoreParens();
	QualType SourceType = Init->getType();
	// Case 1
	if (Entity.isParameterKind()) {
	if (!SourceType->isSamplerT() && !SourceType->isIntegerType()) {
	S.Diag(Kind.getLocation(), diag::err_sampler_argument_required)
	<< SourceType;
	break;
	} else if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Init)) {
	auto Var = cast<VarDecl>(DRE->getDecl());
	// Case 1b and 1c
	// No cast from integer to sampler is needed.
	if (!Var->hasGlobalStorage()) {
	CurInit = ImplicitCastExpr::Create(
	S.Context, Step->Type, CK_LValueToRValue, Init,
	/BasePath=/nullptr, VK_PRValue, FPOptionsOverride());
	break;
	}
	// Case 1a
	// For function call with a file-scope sampler variable as argument,
	// get the integer literal.
	// Do not diagnose if the file-scope variable does not have initializer
	// since this has already been diagnosed when parsing the variable
	// declaration.
	if (!Var->getInit() \|\| !isa<ImplicitCastExpr>(Var->getInit()))
	break;
	Init = cast<ImplicitCastExpr>(const_cast<Expr*>(
	Var->getInit()))->getSubExpr();
	SourceType = Init->getType();
	}
	} else {
	// Case 2
	// Check initializer is 32 bit integer constant.
	// If the initializer is taken from global variable, do not diagnose since
	// this has already been done when parsing the variable declaration.
	if (!Init->isConstantInitializer(S.Context, false))
	break;

	if (!SourceType->isIntegerType() \|\|
	32 != S.Context.getIntWidth(SourceType)) {
	S.Diag(Kind.getLocation(), diag::err_sampler_initializer_not_integer)
	<< SourceType;
	break;
	}

	Expr::EvalResult EVResult;
	Init->EvaluateAsInt(EVResult, S.Context);
	llvm::APSInt Result = EVResult.Val.getInt();
	const uint64_t SamplerValue = Result.getLimitedValue();
	// 32-bit value of sampler's initializer is interpreted as
	// bit-field with the following structure:
	// \|unspecified\|Filter\|Addressing Mode\| Normalized Coords\|
	// \|31 6\|5 4\|3 1\| 0\|
	// This structure corresponds to enum values of sampler properties
	// defined in SPIR spec v1.2 and also opencl-c.h
	unsigned AddressingMode = (0x0E & SamplerValue) >> 1;
	unsigned FilterMode = (0x30 & SamplerValue) >> 4;
	if (FilterMode != 1 && FilterMode != 2 &&
	!S.getOpenCLOptions().isAvailableOption(
	"cl_intel_device_side_avc_motion_estimation", S.getLangOpts()))
	S.Diag(Kind.getLocation(),
	diag::warn_sampler_initializer_invalid_bits)
	<< "Filter Mode";
	if (AddressingMode > 4)
	S.Diag(Kind.getLocation(),
	diag::warn_sampler_initializer_invalid_bits)
	<< "Addressing Mode";
	}

	// Cases 1a, 2a and 2b
	// Insert cast from integer to sampler.
	CurInit = S.ImpCastExprToType(Init, S.Context.OCLSamplerTy,
	CK_IntToOCLSampler);
	break;
	}
	case SK_OCLZeroOpaqueType: {
	assert((Step->Type->isEventT() \|\| Step->Type->isQueueT() \|\|
	Step->Type->isOCLIntelSubgroupAVCType()) &&
	"Wrong type for initialization of OpenCL opaque type.");

	CurInit = S.ImpCastExprToType(CurInit.get(), Step->Type,
	CK_ZeroToOCLOpaqueType,
	CurInit.get()->getValueKind());
	break;
	}
	case SK_ParenthesizedListInit: {
	CurInit = nullptr;
	TryOrBuildParenListInitialization(S, Entity, Kind, Args, *this,
	/VerifyOnly=/false, &CurInit);
	if (CurInit.get() && ResultType)
	*ResultType = CurInit.get()->getType();
	if (shouldBindAsTemporary(Entity))
	CurInit = S.MaybeBindToTemporary(CurInit.get());
	break;
	}
	}
	}

	// Check whether the initializer has a shorter lifetime than the initialized
	// entity, and if not, either lifetime-extend or warn as appropriate.
	if (auto *Init = CurInit.get())
	S.checkInitializerLifetime(Entity, Init);

	// Diagnose non-fatal problems with the completed initialization.
	- if (Entity.getKind() == InitializedEntity::EK_Member &&
	+ if (InitializedEntity::EntityKind EK = Entity.getKind();
	+ (EK == InitializedEntity::EK_Member \|\|
	+ EK == InitializedEntity::EK_ParenAggInitMember) &&
	cast<FieldDecl>(Entity.getDecl())->isBitField())
	S.CheckBitFieldInitialization(Kind.getLocation(),
	cast<FieldDecl>(Entity.getDecl()),
	CurInit.get());

	// Check for std::move on construction.
	if (const Expr *E = CurInit.get()) {
	CheckMoveOnConstruction(S, E,
	Entity.getKind() == InitializedEntity::EK_Result);
	}

	return CurInit;
	}

	/// Somewhere within T there is an uninitialized reference subobject.
	/// Dig it out and diagnose it.
	static bool DiagnoseUninitializedReference(Sema &S, SourceLocation Loc,
	QualType T) {
	if (T->isReferenceType()) {
	S.Diag(Loc, diag::err_reference_without_init)
	<< T.getNonReferenceType();
	return true;
	}

	CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
	if (!RD \|\| !RD->hasUninitializedReferenceMember())
	return false;

	for (const auto *FI : RD->fields()) {
	if (FI->isUnnamedBitfield())
	continue;

	if (DiagnoseUninitializedReference(S, FI->getLocation(), FI->getType())) {
	S.Diag(Loc, diag::note_value_initialization_here) << RD;
	return true;
	}
	}

	for (const auto &BI : RD->bases()) {
	if (DiagnoseUninitializedReference(S, BI.getBeginLoc(), BI.getType())) {
	S.Diag(Loc, diag::note_value_initialization_here) << RD;
	return true;
	}
	}

	return false;
	}


	//===----------------------------------------------------------------------===//
	// Diagnose initialization failures
	//===----------------------------------------------------------------------===//

	/// Emit notes associated with an initialization that failed due to a
	/// "simple" conversion failure.
	static void emitBadConversionNotes(Sema &S, const InitializedEntity &entity,
	Expr *op) {
	QualType destType = entity.getType();
	if (destType.getNonReferenceType()->isObjCObjectPointerType() &&
	op->getType()->isObjCObjectPointerType()) {

	// Emit a possible note about the conversion failing because the
	// operand is a message send with a related result type.
	S.EmitRelatedResultTypeNote(op);

	// Emit a possible note about a return failing because we're
	// expecting a related result type.
	if (entity.getKind() == InitializedEntity::EK_Result)
	S.EmitRelatedResultTypeNoteForReturn(destType);
	}
	QualType fromType = op->getType();
	QualType fromPointeeType = fromType.getCanonicalType()->getPointeeType();
	QualType destPointeeType = destType.getCanonicalType()->getPointeeType();
	auto *fromDecl = fromType->getPointeeCXXRecordDecl();
	auto *destDecl = destType->getPointeeCXXRecordDecl();
	if (fromDecl && destDecl && fromDecl->getDeclKind() == Decl::CXXRecord &&
	destDecl->getDeclKind() == Decl::CXXRecord &&
	!fromDecl->isInvalidDecl() && !destDecl->isInvalidDecl() &&
	!fromDecl->hasDefinition() &&
	destPointeeType.getQualifiers().compatiblyIncludes(
	fromPointeeType.getQualifiers()))
	S.Diag(fromDecl->getLocation(), diag::note_forward_class_conversion)
	<< S.getASTContext().getTagDeclType(fromDecl)
	<< S.getASTContext().getTagDeclType(destDecl);
	}

	static void diagnoseListInit(Sema &S, const InitializedEntity &Entity,
	InitListExpr *InitList) {
	QualType DestType = Entity.getType();

	QualType E;
	if (S.getLangOpts().CPlusPlus11 && S.isStdInitializerList(DestType, &E)) {
	QualType ArrayType = S.Context.getConstantArrayType(
	E.withConst(),
	llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()),
	InitList->getNumInits()),
	nullptr, clang::ArrayType::Normal, 0);
	InitializedEntity HiddenArray =
	InitializedEntity::InitializeTemporary(ArrayType);
	return diagnoseListInit(S, HiddenArray, InitList);
	}

	if (DestType->isReferenceType()) {
	// A list-initialization failure for a reference means that we tried to
	// create a temporary of the inner type (per [dcl.init.list]p3.6) and the
	// inner initialization failed.
	QualType T = DestType->castAs<ReferenceType>()->getPointeeType();
	diagnoseListInit(S, InitializedEntity::InitializeTemporary(T), InitList);
	SourceLocation Loc = InitList->getBeginLoc();
	if (auto *D = Entity.getDecl())
	Loc = D->getLocation();
	S.Diag(Loc, diag::note_in_reference_temporary_list_initializer) << T;
	return;
	}

	InitListChecker DiagnoseInitList(S, Entity, InitList, DestType,
	/VerifyOnly=/false,
	/TreatUnavailableAsInvalid=/false);
	assert(DiagnoseInitList.HadError() &&
	"Inconsistent init list check result.");
	}

	bool InitializationSequence::Diagnose(Sema &S,
	const InitializedEntity &Entity,
	const InitializationKind &Kind,
	ArrayRef<Expr *> Args) {
	if (!Failed())
	return false;

	// When we want to diagnose only one element of a braced-init-list,
	// we need to factor it out.
	Expr *OnlyArg;
	if (Args.size() == 1) {
	auto *List = dyn_cast<InitListExpr>(Args[0]);
	if (List && List->getNumInits() == 1)
	OnlyArg = List->getInit(0);
	else
	OnlyArg = Args[0];
	}
	else
	OnlyArg = nullptr;

	QualType DestType = Entity.getType();
	switch (Failure) {
	case FK_TooManyInitsForReference:
	// FIXME: Customize for the initialized entity?
	if (Args.empty()) {
	// Dig out the reference subobject which is uninitialized and diagnose it.
	// If this is value-initialization, this could be nested some way within
	// the target type.
	assert(Kind.getKind() == InitializationKind::IK_Value \|\|
	DestType->isReferenceType());
	bool Diagnosed =
	DiagnoseUninitializedReference(S, Kind.getLocation(), DestType);
	assert(Diagnosed && "couldn't find uninitialized reference to diagnose");
	(void)Diagnosed;
	} else // FIXME: diagnostic below could be better!
	S.Diag(Kind.getLocation(), diag::err_reference_has_multiple_inits)
	<< SourceRange(Args.front()->getBeginLoc(), Args.back()->getEndLoc());
	break;
	case FK_ParenthesizedListInitForReference:
	S.Diag(Kind.getLocation(), diag::err_list_init_in_parens)
	<< 1 << Entity.getType() << Args[0]->getSourceRange();
	break;

	case FK_ArrayNeedsInitList:
	S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 0;
	break;
	case FK_ArrayNeedsInitListOrStringLiteral:
	S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 1;
	break;
	case FK_ArrayNeedsInitListOrWideStringLiteral:
	S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 2;
	break;
	case FK_NarrowStringIntoWideCharArray:
	S.Diag(Kind.getLocation(), diag::err_array_init_narrow_string_into_wchar);
	break;
	case FK_WideStringIntoCharArray:
	S.Diag(Kind.getLocation(), diag::err_array_init_wide_string_into_char);
	break;
	case FK_IncompatWideStringIntoWideChar:
	S.Diag(Kind.getLocation(),
	diag::err_array_init_incompat_wide_string_into_wchar);
	break;
	case FK_PlainStringIntoUTF8Char:
	S.Diag(Kind.getLocation(),
	diag::err_array_init_plain_string_into_char8_t);
	S.Diag(Args.front()->getBeginLoc(),
	diag::note_array_init_plain_string_into_char8_t)
	<< FixItHint::CreateInsertion(Args.front()->getBeginLoc(), "u8");
	break;
	case FK_UTF8StringIntoPlainChar:
	S.Diag(Kind.getLocation(), diag::err_array_init_utf8_string_into_char)
	<< DestType->isSignedIntegerType() << S.getLangOpts().CPlusPlus20;
	break;
	case FK_ArrayTypeMismatch:
	case FK_NonConstantArrayInit:
	S.Diag(Kind.getLocation(),
	(Failure == FK_ArrayTypeMismatch
	? diag::err_array_init_different_type
	: diag::err_array_init_non_constant_array))
	<< DestType.getNonReferenceType()
	<< OnlyArg->getType()
	<< Args[0]->getSourceRange();
	break;

	case FK_VariableLengthArrayHasInitializer:
	S.Diag(Kind.getLocation(), diag::err_variable_object_no_init)
	<< Args[0]->getSourceRange();
	break;

	case FK_AddressOfOverloadFailed: {
	DeclAccessPair Found;
	S.ResolveAddressOfOverloadedFunction(OnlyArg,
	DestType.getNonReferenceType(),
	true,
	Found);
	break;
	}

	case FK_AddressOfUnaddressableFunction: {
	auto *FD = cast<FunctionDecl>(cast<DeclRefExpr>(OnlyArg)->getDecl());
	S.checkAddressOfFunctionIsAvailable(FD, /Complain=/true,
	OnlyArg->getBeginLoc());
	break;
	}

	case FK_ReferenceInitOverloadFailed:
	case FK_UserConversionOverloadFailed:
	switch (FailedOverloadResult) {
	case OR_Ambiguous:

	FailedCandidateSet.NoteCandidates(
	PartialDiagnosticAt(
	Kind.getLocation(),
	Failure == FK_UserConversionOverloadFailed
	? (S.PDiag(diag::err_typecheck_ambiguous_condition)
	<< OnlyArg->getType() << DestType
	<< Args[0]->getSourceRange())
	: (S.PDiag(diag::err_ref_init_ambiguous)
	<< DestType << OnlyArg->getType()
	<< Args[0]->getSourceRange())),
	S, OCD_AmbiguousCandidates, Args);
	break;

	case OR_No_Viable_Function: {
	auto Cands = FailedCandidateSet.CompleteCandidates(S, OCD_AllCandidates, Args);
	if (!S.RequireCompleteType(Kind.getLocation(),
	DestType.getNonReferenceType(),
	diag::err_typecheck_nonviable_condition_incomplete,
	OnlyArg->getType(), Args[0]->getSourceRange()))
	S.Diag(Kind.getLocation(), diag::err_typecheck_nonviable_condition)
	<< (Entity.getKind() == InitializedEntity::EK_Result)
	<< OnlyArg->getType() << Args[0]->getSourceRange()
	<< DestType.getNonReferenceType();

	FailedCandidateSet.NoteCandidates(S, Args, Cands);
	break;
	}
	case OR_Deleted: {
	S.Diag(Kind.getLocation(), diag::err_typecheck_deleted_function)
	<< OnlyArg->getType() << DestType.getNonReferenceType()
	<< Args[0]->getSourceRange();
	OverloadCandidateSet::iterator Best;
	OverloadingResult Ovl
	= FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
	if (Ovl == OR_Deleted) {
	S.NoteDeletedFunction(Best->Function);
	} else {
	llvm_unreachable("Inconsistent overload resolution?");
	}
	break;
	}

	case OR_Success:
	llvm_unreachable("Conversion did not fail!");
	}
	break;

	case FK_NonConstLValueReferenceBindingToTemporary:
	if (isa<InitListExpr>(Args[0])) {
	S.Diag(Kind.getLocation(),
	diag::err_lvalue_reference_bind_to_initlist)
	<< DestType.getNonReferenceType().isVolatileQualified()
	<< DestType.getNonReferenceType()
	<< Args[0]->getSourceRange();
	break;
	}
	[[fallthrough]];

	case FK_NonConstLValueReferenceBindingToUnrelated:
	S.Diag(Kind.getLocation(),
	Failure == FK_NonConstLValueReferenceBindingToTemporary
	? diag::err_lvalue_reference_bind_to_temporary
	: diag::err_lvalue_reference_bind_to_unrelated)
	<< DestType.getNonReferenceType().isVolatileQualified()
	<< DestType.getNonReferenceType()
	<< OnlyArg->getType()
	<< Args[0]->getSourceRange();
	break;

	case FK_NonConstLValueReferenceBindingToBitfield: {
	// We don't necessarily have an unambiguous source bit-field.
	FieldDecl *BitField = Args[0]->getSourceBitField();
	S.Diag(Kind.getLocation(), diag::err_reference_bind_to_bitfield)
	<< DestType.isVolatileQualified()
	<< (BitField ? BitField->getDeclName() : DeclarationName())
	<< (BitField != nullptr)
	<< Args[0]->getSourceRange();
	if (BitField)
	S.Diag(BitField->getLocation(), diag::note_bitfield_decl);
	break;
	}

	case FK_NonConstLValueReferenceBindingToVectorElement:
	S.Diag(Kind.getLocation(), diag::err_reference_bind_to_vector_element)
	<< DestType.isVolatileQualified()
	<< Args[0]->getSourceRange();
	break;

	case FK_NonConstLValueReferenceBindingToMatrixElement:
	S.Diag(Kind.getLocation(), diag::err_reference_bind_to_matrix_element)
	<< DestType.isVolatileQualified() << Args[0]->getSourceRange();
	break;

	case FK_RValueReferenceBindingToLValue:
	S.Diag(Kind.getLocation(), diag::err_lvalue_to_rvalue_ref)
	<< DestType.getNonReferenceType() << OnlyArg->getType()
	<< Args[0]->getSourceRange();
	break;

	case FK_ReferenceAddrspaceMismatchTemporary:
	S.Diag(Kind.getLocation(), diag::err_reference_bind_temporary_addrspace)
	<< DestType << Args[0]->getSourceRange();
	break;

	case FK_ReferenceInitDropsQualifiers: {
	QualType SourceType = OnlyArg->getType();
	QualType NonRefType = DestType.getNonReferenceType();
	Qualifiers DroppedQualifiers =
	SourceType.getQualifiers() - NonRefType.getQualifiers();

	if (!NonRefType.getQualifiers().isAddressSpaceSupersetOf(
	SourceType.getQualifiers()))
	S.Diag(Kind.getLocation(), diag::err_reference_bind_drops_quals)
	<< NonRefType << SourceType << 1 /addr space/
	<< Args[0]->getSourceRange();
	else if (DroppedQualifiers.hasQualifiers())
	S.Diag(Kind.getLocation(), diag::err_reference_bind_drops_quals)
	<< NonRefType << SourceType << 0 /cv quals/
	<< Qualifiers::fromCVRMask(DroppedQualifiers.getCVRQualifiers())
	<< DroppedQualifiers.getCVRQualifiers() << Args[0]->getSourceRange();
	else
	// FIXME: Consider decomposing the type and explaining which qualifiers
	// were dropped where, or on which level a 'const' is missing, etc.
	S.Diag(Kind.getLocation(), diag::err_reference_bind_drops_quals)
	<< NonRefType << SourceType << 2 /incompatible quals/
	<< Args[0]->getSourceRange();
	break;
	}

	case FK_ReferenceInitFailed:
	S.Diag(Kind.getLocation(), diag::err_reference_bind_failed)
	<< DestType.getNonReferenceType()
	<< DestType.getNonReferenceType()->isIncompleteType()
	<< OnlyArg->isLValue()
	<< OnlyArg->getType()
	<< Args[0]->getSourceRange();
	emitBadConversionNotes(S, Entity, Args[0]);
	break;

	case FK_ConversionFailed: {
	QualType FromType = OnlyArg->getType();
	PartialDiagnostic PDiag = S.PDiag(diag::err_init_conversion_failed)
	<< (int)Entity.getKind()
	<< DestType
	<< OnlyArg->isLValue()
	<< FromType
	<< Args[0]->getSourceRange();
	S.HandleFunctionTypeMismatch(PDiag, FromType, DestType);
	S.Diag(Kind.getLocation(), PDiag);
	emitBadConversionNotes(S, Entity, Args[0]);
	break;
	}

	case FK_ConversionFromPropertyFailed:
	// No-op. This error has already been reported.
	break;

	case FK_TooManyInitsForScalar: {
	SourceRange R;

	auto *InitList = dyn_cast<InitListExpr>(Args[0]);
	if (InitList && InitList->getNumInits() >= 1) {
	R = SourceRange(InitList->getInit(0)->getEndLoc(), InitList->getEndLoc());
	} else {
	assert(Args.size() > 1 && "Expected multiple initializers!");
	R = SourceRange(Args.front()->getEndLoc(), Args.back()->getEndLoc());
	}

	R.setBegin(S.getLocForEndOfToken(R.getBegin()));
	if (Kind.isCStyleOrFunctionalCast())
	S.Diag(Kind.getLocation(), diag::err_builtin_func_cast_more_than_one_arg)
	<< R;
	else
	S.Diag(Kind.getLocation(), diag::err_excess_initializers)
	<< /scalar=/2 << R;
	break;
	}

	case FK_ParenthesizedListInitForScalar:
	S.Diag(Kind.getLocation(), diag::err_list_init_in_parens)
	<< 0 << Entity.getType() << Args[0]->getSourceRange();
	break;

	case FK_ReferenceBindingToInitList:
	S.Diag(Kind.getLocation(), diag::err_reference_bind_init_list)
	<< DestType.getNonReferenceType() << Args[0]->getSourceRange();
	break;

	case FK_InitListBadDestinationType:
	S.Diag(Kind.getLocation(), diag::err_init_list_bad_dest_type)
	<< (DestType->isRecordType()) << DestType << Args[0]->getSourceRange();
	break;

	case FK_ListConstructorOverloadFailed:
	case FK_ConstructorOverloadFailed: {
	SourceRange ArgsRange;
	if (Args.size())
	ArgsRange =
	SourceRange(Args.front()->getBeginLoc(), Args.back()->getEndLoc());

	if (Failure == FK_ListConstructorOverloadFailed) {
	assert(Args.size() == 1 &&
	"List construction from other than 1 argument.");
	InitListExpr *InitList = cast<InitListExpr>(Args[0]);
	Args = MultiExprArg(InitList->getInits(), InitList->getNumInits());
	}

	// FIXME: Using "DestType" for the entity we're printing is probably
	// bad.
	switch (FailedOverloadResult) {
	case OR_Ambiguous:
	FailedCandidateSet.NoteCandidates(
	PartialDiagnosticAt(Kind.getLocation(),
	S.PDiag(diag::err_ovl_ambiguous_init)
	<< DestType << ArgsRange),
	S, OCD_AmbiguousCandidates, Args);
	break;

	case OR_No_Viable_Function:
	if (Kind.getKind() == InitializationKind::IK_Default &&
	(Entity.getKind() == InitializedEntity::EK_Base \|\|
	- Entity.getKind() == InitializedEntity::EK_Member) &&
	+ Entity.getKind() == InitializedEntity::EK_Member \|\|
	+ Entity.getKind() == InitializedEntity::EK_ParenAggInitMember) &&
	isa<CXXConstructorDecl>(S.CurContext)) {
	// This is implicit default initialization of a member or
	// base within a constructor. If no viable function was
	// found, notify the user that they need to explicitly
	// initialize this base/member.
	CXXConstructorDecl *Constructor
	= cast<CXXConstructorDecl>(S.CurContext);
	const CXXRecordDecl *InheritedFrom = nullptr;
	if (auto Inherited = Constructor->getInheritedConstructor())
	InheritedFrom = Inherited.getShadowDecl()->getNominatedBaseClass();
	if (Entity.getKind() == InitializedEntity::EK_Base) {
	S.Diag(Kind.getLocation(), diag::err_missing_default_ctor)
	<< (InheritedFrom ? 2 : Constructor->isImplicit() ? 1 : 0)
	<< S.Context.getTypeDeclType(Constructor->getParent())
	<< /base=/0
	<< Entity.getType()
	<< InheritedFrom;

	RecordDecl *BaseDecl
	= Entity.getBaseSpecifier()->getType()->castAs<RecordType>()
	->getDecl();
	S.Diag(BaseDecl->getLocation(), diag::note_previous_decl)
	<< S.Context.getTagDeclType(BaseDecl);
	} else {
	S.Diag(Kind.getLocation(), diag::err_missing_default_ctor)
	<< (InheritedFrom ? 2 : Constructor->isImplicit() ? 1 : 0)
	<< S.Context.getTypeDeclType(Constructor->getParent())
	<< /member=/1
	<< Entity.getName()
	<< InheritedFrom;
	S.Diag(Entity.getDecl()->getLocation(),
	diag::note_member_declared_at);

	if (const RecordType *Record
	= Entity.getType()->getAs<RecordType>())
	S.Diag(Record->getDecl()->getLocation(),
	diag::note_previous_decl)
	<< S.Context.getTagDeclType(Record->getDecl());
	}
	break;
	}

	FailedCandidateSet.NoteCandidates(
	PartialDiagnosticAt(
	Kind.getLocation(),
	S.PDiag(diag::err_ovl_no_viable_function_in_init)
	<< DestType << ArgsRange),
	S, OCD_AllCandidates, Args);
	break;

	case OR_Deleted: {
	OverloadCandidateSet::iterator Best;
	OverloadingResult Ovl
	= FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
	if (Ovl != OR_Deleted) {
	S.Diag(Kind.getLocation(), diag::err_ovl_deleted_init)
	<< DestType << ArgsRange;
	llvm_unreachable("Inconsistent overload resolution?");
	break;
	}

	// If this is a defaulted or implicitly-declared function, then
	// it was implicitly deleted. Make it clear that the deletion was
	// implicit.
	if (S.isImplicitlyDeleted(Best->Function))
	S.Diag(Kind.getLocation(), diag::err_ovl_deleted_special_init)
	<< S.getSpecialMember(cast<CXXMethodDecl>(Best->Function))
	<< DestType << ArgsRange;
	else
	S.Diag(Kind.getLocation(), diag::err_ovl_deleted_init)
	<< DestType << ArgsRange;

	S.NoteDeletedFunction(Best->Function);
	break;
	}

	case OR_Success:
	llvm_unreachable("Conversion did not fail!");
	}
	}
	break;

	case FK_DefaultInitOfConst:
	if (Entity.getKind() == InitializedEntity::EK_Member &&
	isa<CXXConstructorDecl>(S.CurContext)) {
	// This is implicit default-initialization of a const member in
	// a constructor. Complain that it needs to be explicitly
	// initialized.
	CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(S.CurContext);
	S.Diag(Kind.getLocation(), diag::err_uninitialized_member_in_ctor)
	<< (Constructor->getInheritedConstructor() ? 2 :
	Constructor->isImplicit() ? 1 : 0)
	<< S.Context.getTypeDeclType(Constructor->getParent())
	<< /const=/1
	<< Entity.getName();
	S.Diag(Entity.getDecl()->getLocation(), diag::note_previous_decl)
	<< Entity.getName();
	} else if (const auto *VD = dyn_cast_if_present<VarDecl>(Entity.getDecl());
	VD && VD->isConstexpr()) {
	S.Diag(Kind.getLocation(), diag::err_constexpr_var_requires_const_init)
	<< VD;
	} else {
	S.Diag(Kind.getLocation(), diag::err_default_init_const)
	<< DestType << (bool)DestType->getAs<RecordType>();
	}
	break;

	case FK_Incomplete:
	S.RequireCompleteType(Kind.getLocation(), FailedIncompleteType,
	diag::err_init_incomplete_type);
	break;

	case FK_ListInitializationFailed: {
	// Run the init list checker again to emit diagnostics.
	InitListExpr *InitList = cast<InitListExpr>(Args[0]);
	diagnoseListInit(S, Entity, InitList);
	break;
	}

	case FK_PlaceholderType: {
	// FIXME: Already diagnosed!
	break;
	}

	case FK_ExplicitConstructor: {
	S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
	<< Args[0]->getSourceRange();
	OverloadCandidateSet::iterator Best;
	OverloadingResult Ovl
	= FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
	(void)Ovl;
	assert(Ovl == OR_Success && "Inconsistent overload resolution");
	CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
	S.Diag(CtorDecl->getLocation(),
	diag::note_explicit_ctor_deduction_guide_here) << false;
	break;
	}

	case FK_ParenthesizedListInitFailed:
	TryOrBuildParenListInitialization(S, Entity, Kind, Args, *this,
	/VerifyOnly=/false);
	break;
	}

	PrintInitLocationNote(S, Entity);
	return true;
	}

	void InitializationSequence::dump(raw_ostream &OS) const {
	switch (SequenceKind) {
	case FailedSequence: {
	OS << "Failed sequence: ";
	switch (Failure) {
	case FK_TooManyInitsForReference:
	OS << "too many initializers for reference";
	break;

	case FK_ParenthesizedListInitForReference:
	OS << "parenthesized list init for reference";
	break;

	case FK_ArrayNeedsInitList:
	OS << "array requires initializer list";
	break;

	case FK_AddressOfUnaddressableFunction:
	OS << "address of unaddressable function was taken";
	break;

	case FK_ArrayNeedsInitListOrStringLiteral:
	OS << "array requires initializer list or string literal";
	break;

	case FK_ArrayNeedsInitListOrWideStringLiteral:
	OS << "array requires initializer list or wide string literal";
	break;

	case FK_NarrowStringIntoWideCharArray:
	OS << "narrow string into wide char array";
	break;

	case FK_WideStringIntoCharArray:
	OS << "wide string into char array";
	break;

	case FK_IncompatWideStringIntoWideChar:
	OS << "incompatible wide string into wide char array";
	break;

	case FK_PlainStringIntoUTF8Char:
	OS << "plain string literal into char8_t array";
	break;

	case FK_UTF8StringIntoPlainChar:
	OS << "u8 string literal into char array";
	break;

	case FK_ArrayTypeMismatch:
	OS << "array type mismatch";
	break;

	case FK_NonConstantArrayInit:
	OS << "non-constant array initializer";
	break;

	case FK_AddressOfOverloadFailed:
	OS << "address of overloaded function failed";
	break;

	case FK_ReferenceInitOverloadFailed:
	OS << "overload resolution for reference initialization failed";
	break;

	case FK_NonConstLValueReferenceBindingToTemporary:
	OS << "non-const lvalue reference bound to temporary";
	break;

	case FK_NonConstLValueReferenceBindingToBitfield:
	OS << "non-const lvalue reference bound to bit-field";
	break;

	case FK_NonConstLValueReferenceBindingToVectorElement:
	OS << "non-const lvalue reference bound to vector element";
	break;

	case FK_NonConstLValueReferenceBindingToMatrixElement:
	OS << "non-const lvalue reference bound to matrix element";
	break;

	case FK_NonConstLValueReferenceBindingToUnrelated:
	OS << "non-const lvalue reference bound to unrelated type";
	break;

	case FK_RValueReferenceBindingToLValue:
	OS << "rvalue reference bound to an lvalue";
	break;

	case FK_ReferenceInitDropsQualifiers:
	OS << "reference initialization drops qualifiers";
	break;

	case FK_ReferenceAddrspaceMismatchTemporary:
	OS << "reference with mismatching address space bound to temporary";
	break;

	case FK_ReferenceInitFailed:
	OS << "reference initialization failed";
	break;

	case FK_ConversionFailed:
	OS << "conversion failed";
	break;

	case FK_ConversionFromPropertyFailed:
	OS << "conversion from property failed";
	break;

	case FK_TooManyInitsForScalar:
	OS << "too many initializers for scalar";
	break;

	case FK_ParenthesizedListInitForScalar:
	OS << "parenthesized list init for reference";
	break;

	case FK_ReferenceBindingToInitList:
	OS << "referencing binding to initializer list";
	break;

	case FK_InitListBadDestinationType:
	OS << "initializer list for non-aggregate, non-scalar type";
	break;

	case FK_UserConversionOverloadFailed:
	OS << "overloading failed for user-defined conversion";
	break;

	case FK_ConstructorOverloadFailed:
	OS << "constructor overloading failed";
	break;

	case FK_DefaultInitOfConst:
	OS << "default initialization of a const variable";
	break;

	case FK_Incomplete:
	OS << "initialization of incomplete type";
	break;

	case FK_ListInitializationFailed:
	OS << "list initialization checker failure";
	break;

	case FK_VariableLengthArrayHasInitializer:
	OS << "variable length array has an initializer";
	break;

	case FK_PlaceholderType:
	OS << "initializer expression isn't contextually valid";
	break;

	case FK_ListConstructorOverloadFailed:
	OS << "list constructor overloading failed";
	break;

	case FK_ExplicitConstructor:
	OS << "list copy initialization chose explicit constructor";
	break;

	case FK_ParenthesizedListInitFailed:
	OS << "parenthesized list initialization failed";
	break;
	}
	OS << '\n';
	return;
	}

	case DependentSequence:
	OS << "Dependent sequence\n";
	return;

	case NormalSequence:
	OS << "Normal sequence: ";
	break;
	}

	for (step_iterator S = step_begin(), SEnd = step_end(); S != SEnd; ++S) {
	if (S != step_begin()) {
	OS << " -> ";
	}

	switch (S->Kind) {
	case SK_ResolveAddressOfOverloadedFunction:
	OS << "resolve address of overloaded function";
	break;

	case SK_CastDerivedToBasePRValue:
	OS << "derived-to-base (prvalue)";
	break;

	case SK_CastDerivedToBaseXValue:
	OS << "derived-to-base (xvalue)";
	break;

	case SK_CastDerivedToBaseLValue:
	OS << "derived-to-base (lvalue)";
	break;

	case SK_BindReference:
	OS << "bind reference to lvalue";
	break;

	case SK_BindReferenceToTemporary:
	OS << "bind reference to a temporary";
	break;

	case SK_FinalCopy:
	OS << "final copy in class direct-initialization";
	break;

	case SK_ExtraneousCopyToTemporary:
	OS << "extraneous C++03 copy to temporary";
	break;

	case SK_UserConversion:
	OS << "user-defined conversion via " << *S->Function.Function;
	break;

	case SK_QualificationConversionPRValue:
	OS << "qualification conversion (prvalue)";
	break;

	case SK_QualificationConversionXValue:
	OS << "qualification conversion (xvalue)";
	break;

	case SK_QualificationConversionLValue:
	OS << "qualification conversion (lvalue)";
	break;

	case SK_FunctionReferenceConversion:
	OS << "function reference conversion";
	break;

	case SK_AtomicConversion:
	OS << "non-atomic-to-atomic conversion";
	break;

	case SK_ConversionSequence:
	OS << "implicit conversion sequence (";
	S->ICS->dump(); // FIXME: use OS
	OS << ")";
	break;

	case SK_ConversionSequenceNoNarrowing:
	OS << "implicit conversion sequence with narrowing prohibited (";
	S->ICS->dump(); // FIXME: use OS
	OS << ")";
	break;

	case SK_ListInitialization:
	OS << "list aggregate initialization";
	break;

	case SK_UnwrapInitList:
	OS << "unwrap reference initializer list";
	break;

	case SK_RewrapInitList:
	OS << "rewrap reference initializer list";
	break;

	case SK_ConstructorInitialization:
	OS << "constructor initialization";
	break;

	case SK_ConstructorInitializationFromList:
	OS << "list initialization via constructor";
	break;

	case SK_ZeroInitialization:
	OS << "zero initialization";
	break;

	case SK_CAssignment:
	OS << "C assignment";
	break;

	case SK_StringInit:
	OS << "string initialization";
	break;

	case SK_ObjCObjectConversion:
	OS << "Objective-C object conversion";
	break;

	case SK_ArrayLoopIndex:
	OS << "indexing for array initialization loop";
	break;

	case SK_ArrayLoopInit:
	OS << "array initialization loop";
	break;

	case SK_ArrayInit:
	OS << "array initialization";
	break;

	case SK_GNUArrayInit:
	OS << "array initialization (GNU extension)";
	break;

	case SK_ParenthesizedArrayInit:
	OS << "parenthesized array initialization";
	break;

	case SK_PassByIndirectCopyRestore:
	OS << "pass by indirect copy and restore";
	break;

	case SK_PassByIndirectRestore:
	OS << "pass by indirect restore";
	break;

	case SK_ProduceObjCObject:
	OS << "Objective-C object retension";
	break;

	case SK_StdInitializerList:
	OS << "std::initializer_list from initializer list";
	break;

	case SK_StdInitializerListConstructorCall:
	OS << "list initialization from std::initializer_list";
	break;

	case SK_OCLSamplerInit:
	OS << "OpenCL sampler_t from integer constant";
	break;

	case SK_OCLZeroOpaqueType:
	OS << "OpenCL opaque type from zero";
	break;
	case SK_ParenthesizedListInit:
	OS << "initialization from a parenthesized list of values";
	break;
	}

	OS << " [" << S->Type << ']';
	}

	OS << '\n';
	}

	void InitializationSequence::dump() const {
	dump(llvm::errs());
	}

	static bool NarrowingErrs(const LangOptions &L) {
	return L.CPlusPlus11 &&
	(!L.MicrosoftExt \|\| L.isCompatibleWithMSVC(LangOptions::MSVC2015));
	}

	static void DiagnoseNarrowingInInitList(Sema &S,
	const ImplicitConversionSequence &ICS,
	QualType PreNarrowingType,
	QualType EntityType,
	const Expr *PostInit) {
	const StandardConversionSequence *SCS = nullptr;
	switch (ICS.getKind()) {
	case ImplicitConversionSequence::StandardConversion:
	SCS = &ICS.Standard;
	break;
	case ImplicitConversionSequence::UserDefinedConversion:
	SCS = &ICS.UserDefined.After;
	break;
	case ImplicitConversionSequence::AmbiguousConversion:
	case ImplicitConversionSequence::StaticObjectArgumentConversion:
	case ImplicitConversionSequence::EllipsisConversion:
	case ImplicitConversionSequence::BadConversion:
	return;
	}

	// C++11 [dcl.init.list]p7: Check whether this is a narrowing conversion.
	APValue ConstantValue;
	QualType ConstantType;
	switch (SCS->getNarrowingKind(S.Context, PostInit, ConstantValue,
	ConstantType)) {
	case NK_Not_Narrowing:
	case NK_Dependent_Narrowing:
	// No narrowing occurred.
	return;

	case NK_Type_Narrowing:
	// This was a floating-to-integer conversion, which is always considered a
	// narrowing conversion even if the value is a constant and can be
	// represented exactly as an integer.
	S.Diag(PostInit->getBeginLoc(), NarrowingErrs(S.getLangOpts())
	? diag::ext_init_list_type_narrowing
	: diag::warn_init_list_type_narrowing)
	<< PostInit->getSourceRange()
	<< PreNarrowingType.getLocalUnqualifiedType()
	<< EntityType.getLocalUnqualifiedType();
	break;

	case NK_Constant_Narrowing:
	// A constant value was narrowed.
	S.Diag(PostInit->getBeginLoc(),
	NarrowingErrs(S.getLangOpts())
	? diag::ext_init_list_constant_narrowing
	: diag::warn_init_list_constant_narrowing)
	<< PostInit->getSourceRange()
	<< ConstantValue.getAsString(S.getASTContext(), ConstantType)
	<< EntityType.getLocalUnqualifiedType();
	break;

	case NK_Variable_Narrowing:
	// A variable's value may have been narrowed.
	S.Diag(PostInit->getBeginLoc(),
	NarrowingErrs(S.getLangOpts())
	? diag::ext_init_list_variable_narrowing
	: diag::warn_init_list_variable_narrowing)
	<< PostInit->getSourceRange()
	<< PreNarrowingType.getLocalUnqualifiedType()
	<< EntityType.getLocalUnqualifiedType();
	break;
	}

	SmallString<128> StaticCast;
	llvm::raw_svector_ostream OS(StaticCast);
	OS << "static_cast<";
	if (const TypedefType *TT = EntityType->getAs<TypedefType>()) {
	// It's important to use the typedef's name if there is one so that the
	// fixit doesn't break code using types like int64_t.
	//
	// FIXME: This will break if the typedef requires qualification. But
	// getQualifiedNameAsString() includes non-machine-parsable components.
	OS << *TT->getDecl();
	} else if (const BuiltinType *BT = EntityType->getAs<BuiltinType>())
	OS << BT->getName(S.getLangOpts());
	else {
	// Oops, we didn't find the actual type of the variable. Don't emit a fixit
	// with a broken cast.
	return;
	}
	OS << ">(";
	S.Diag(PostInit->getBeginLoc(), diag::note_init_list_narrowing_silence)
	<< PostInit->getSourceRange()
	<< FixItHint::CreateInsertion(PostInit->getBeginLoc(), OS.str())
	<< FixItHint::CreateInsertion(
	S.getLocForEndOfToken(PostInit->getEndLoc()), ")");
	}

	//===----------------------------------------------------------------------===//
	// Initialization helper functions
	//===----------------------------------------------------------------------===//
	bool
	Sema::CanPerformCopyInitialization(const InitializedEntity &Entity,
	ExprResult Init) {
	if (Init.isInvalid())
	return false;

	Expr *InitE = Init.get();
	assert(InitE && "No initialization expression");

	InitializationKind Kind =
	InitializationKind::CreateCopy(InitE->getBeginLoc(), SourceLocation());
	InitializationSequence Seq(*this, Entity, Kind, InitE);
	return !Seq.Failed();
	}

	ExprResult
	Sema::PerformCopyInitialization(const InitializedEntity &Entity,
	SourceLocation EqualLoc,
	ExprResult Init,
	bool TopLevelOfInitList,
	bool AllowExplicit) {
	if (Init.isInvalid())
	return ExprError();

	Expr *InitE = Init.get();
	assert(InitE && "No initialization expression?");

	if (EqualLoc.isInvalid())
	EqualLoc = InitE->getBeginLoc();

	InitializationKind Kind = InitializationKind::CreateCopy(
	InitE->getBeginLoc(), EqualLoc, AllowExplicit);
	InitializationSequence Seq(*this, Entity, Kind, InitE, TopLevelOfInitList);

	// Prevent infinite recursion when performing parameter copy-initialization.
	const bool ShouldTrackCopy =
	Entity.isParameterKind() && Seq.isConstructorInitialization();
	if (ShouldTrackCopy) {
	if (llvm::is_contained(CurrentParameterCopyTypes, Entity.getType())) {
	Seq.SetOverloadFailure(
	InitializationSequence::FK_ConstructorOverloadFailed,
	OR_No_Viable_Function);

	// Try to give a meaningful diagnostic note for the problematic
	// constructor.
	const auto LastStep = Seq.step_end() - 1;
	assert(LastStep->Kind ==
	InitializationSequence::SK_ConstructorInitialization);
	const FunctionDecl *Function = LastStep->Function.Function;
	auto Candidate =
	llvm::find_if(Seq.getFailedCandidateSet(),
	[Function](const OverloadCandidate &Candidate) -> bool {
	return Candidate.Viable &&
	Candidate.Function == Function &&
	Candidate.Conversions.size() > 0;
	});
	if (Candidate != Seq.getFailedCandidateSet().end() &&
	Function->getNumParams() > 0) {
	Candidate->Viable = false;
	Candidate->FailureKind = ovl_fail_bad_conversion;
	Candidate->Conversions[0].setBad(BadConversionSequence::no_conversion,
	InitE,
	Function->getParamDecl(0)->getType());
	}
	}
	CurrentParameterCopyTypes.push_back(Entity.getType());
	}

	ExprResult Result = Seq.Perform(*this, Entity, Kind, InitE);

	if (ShouldTrackCopy)
	CurrentParameterCopyTypes.pop_back();

	return Result;
	}

	/// Determine whether RD is, or is derived from, a specialization of CTD.
	static bool isOrIsDerivedFromSpecializationOf(CXXRecordDecl *RD,
	ClassTemplateDecl *CTD) {
	auto NotSpecialization = [&] (const CXXRecordDecl *Candidate) {
	auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(Candidate);
	return !CTSD \|\| !declaresSameEntity(CTSD->getSpecializedTemplate(), CTD);
	};
	return !(NotSpecialization(RD) && RD->forallBases(NotSpecialization));
	}

	QualType Sema::DeduceTemplateSpecializationFromInitializer(
	TypeSourceInfo *TSInfo, const InitializedEntity &Entity,
	const InitializationKind &Kind, MultiExprArg Inits) {
	auto *DeducedTST = dyn_cast<DeducedTemplateSpecializationType>(
	TSInfo->getType()->getContainedDeducedType());
	assert(DeducedTST && "not a deduced template specialization type");

	auto TemplateName = DeducedTST->getTemplateName();
	if (TemplateName.isDependent())
	return SubstAutoTypeDependent(TSInfo->getType());

	// We can only perform deduction for class templates.
	auto *Template =
	dyn_cast_or_null<ClassTemplateDecl>(TemplateName.getAsTemplateDecl());
	if (!Template) {
	Diag(Kind.getLocation(),
	diag::err_deduced_non_class_template_specialization_type)
	<< (int)getTemplateNameKindForDiagnostics(TemplateName) << TemplateName;
	if (auto *TD = TemplateName.getAsTemplateDecl())
	Diag(TD->getLocation(), diag::note_template_decl_here);
	return QualType();
	}

	// Can't deduce from dependent arguments.
	if (Expr::hasAnyTypeDependentArguments(Inits)) {
	Diag(TSInfo->getTypeLoc().getBeginLoc(),
	diag::warn_cxx14_compat_class_template_argument_deduction)
	<< TSInfo->getTypeLoc().getSourceRange() << 0;
	return SubstAutoTypeDependent(TSInfo->getType());
	}

	// FIXME: Perform "exact type" matching first, per CWG discussion?
	// Or implement this via an implied 'T(T) -> T' deduction guide?

	// FIXME: Do we need/want a std::initializer_list<T> special case?

	// Look up deduction guides, including those synthesized from constructors.
	//
	// C++1z [over.match.class.deduct]p1:
	// A set of functions and function templates is formed comprising:
	// - For each constructor of the class template designated by the
	// template-name, a function template [...]
	// - For each deduction-guide, a function or function template [...]
	DeclarationNameInfo NameInfo(
	Context.DeclarationNames.getCXXDeductionGuideName(Template),
	TSInfo->getTypeLoc().getEndLoc());
	LookupResult Guides(*this, NameInfo, LookupOrdinaryName);
	LookupQualifiedName(Guides, Template->getDeclContext());

	// FIXME: Do not diagnose inaccessible deduction guides. The standard isn't
	// clear on this, but they're not found by name so access does not apply.
	Guides.suppressDiagnostics();

	// Figure out if this is list-initialization.
	InitListExpr *ListInit =
	(Inits.size() == 1 && Kind.getKind() != InitializationKind::IK_Direct)
	? dyn_cast<InitListExpr>(Inits[0])
	: nullptr;

	// C++1z [over.match.class.deduct]p1:
	// Initialization and overload resolution are performed as described in
	// [dcl.init] and [over.match.ctor], [over.match.copy], or [over.match.list]
	// (as appropriate for the type of initialization performed) for an object
	// of a hypothetical class type, where the selected functions and function
	// templates are considered to be the constructors of that class type
	//
	// Since we know we're initializing a class type of a type unrelated to that
	// of the initializer, this reduces to something fairly reasonable.
	OverloadCandidateSet Candidates(Kind.getLocation(),
	OverloadCandidateSet::CSK_Normal);
	OverloadCandidateSet::iterator Best;

	bool HasAnyDeductionGuide = false;
	bool AllowExplicit = !Kind.isCopyInit() \|\| ListInit;

	auto tryToResolveOverload =
	[&](bool OnlyListConstructors) -> OverloadingResult {
	Candidates.clear(OverloadCandidateSet::CSK_Normal);
	HasAnyDeductionGuide = false;

	for (auto I = Guides.begin(), E = Guides.end(); I != E; ++I) {
	NamedDecl D = (I)->getUnderlyingDecl();
	if (D->isInvalidDecl())
	continue;

	auto *TD = dyn_cast<FunctionTemplateDecl>(D);
	auto *GD = dyn_cast_or_null<CXXDeductionGuideDecl>(
	TD ? TD->getTemplatedDecl() : dyn_cast<FunctionDecl>(D));
	if (!GD)
	continue;

	if (!GD->isImplicit())
	HasAnyDeductionGuide = true;

	// C++ [over.match.ctor]p1: (non-list copy-initialization from non-class)
	// For copy-initialization, the candidate functions are all the
	// converting constructors (12.3.1) of that class.
	// C++ [over.match.copy]p1: (non-list copy-initialization from class)
	// The converting constructors of T are candidate functions.
	if (!AllowExplicit) {
	// Overload resolution checks whether the deduction guide is declared
	// explicit for us.

	// When looking for a converting constructor, deduction guides that
	// could never be called with one argument are not interesting to
	// check or note.
	if (GD->getMinRequiredArguments() > 1 \|\|
	(GD->getNumParams() == 0 && !GD->isVariadic()))
	continue;
	}

	// C++ [over.match.list]p1.1: (first phase list initialization)
	// Initially, the candidate functions are the initializer-list
	// constructors of the class T
	if (OnlyListConstructors && !isInitListConstructor(GD))
	continue;

	// C++ [over.match.list]p1.2: (second phase list initialization)
	// the candidate functions are all the constructors of the class T
	// C++ [over.match.ctor]p1: (all other cases)
	// the candidate functions are all the constructors of the class of
	// the object being initialized

	// C++ [over.best.ics]p4:
	// When [...] the constructor [...] is a candidate by
	// - [over.match.copy] (in all cases)
	// FIXME: The "second phase of [over.match.list] case can also
	// theoretically happen here, but it's not clear whether we can
	// ever have a parameter of the right type.
	bool SuppressUserConversions = Kind.isCopyInit();

	if (TD)
	AddTemplateOverloadCandidate(TD, I.getPair(), /ExplicitArgs/ nullptr,
	Inits, Candidates, SuppressUserConversions,
	/PartialOverloading/ false,
	AllowExplicit);
	else
	AddOverloadCandidate(GD, I.getPair(), Inits, Candidates,
	SuppressUserConversions,
	/PartialOverloading/ false, AllowExplicit);
	}
	return Candidates.BestViableFunction(*this, Kind.getLocation(), Best);
	};

	OverloadingResult Result = OR_No_Viable_Function;

	// C++11 [over.match.list]p1, per DR1467: for list-initialization, first
	// try initializer-list constructors.
	if (ListInit) {
	bool TryListConstructors = true;

	// Try list constructors unless the list is empty and the class has one or
	// more default constructors, in which case those constructors win.
	if (!ListInit->getNumInits()) {
	for (NamedDecl *D : Guides) {
	auto *FD = dyn_cast<FunctionDecl>(D->getUnderlyingDecl());
	if (FD && FD->getMinRequiredArguments() == 0) {
	TryListConstructors = false;
	break;
	}
	}
	} else if (ListInit->getNumInits() == 1) {
	// C++ [over.match.class.deduct]:
	// As an exception, the first phase in [over.match.list] (considering
	// initializer-list constructors) is omitted if the initializer list
	// consists of a single expression of type cv U, where U is a
	// specialization of C or a class derived from a specialization of C.
	Expr *E = ListInit->getInit(0);
	auto *RD = E->getType()->getAsCXXRecordDecl();
	if (!isa<InitListExpr>(E) && RD &&
	isCompleteType(Kind.getLocation(), E->getType()) &&
	isOrIsDerivedFromSpecializationOf(RD, Template))
	TryListConstructors = false;
	}

	if (TryListConstructors)
	Result = tryToResolveOverload(/OnlyListConstructor/true);
	// Then unwrap the initializer list and try again considering all
	// constructors.
	Inits = MultiExprArg(ListInit->getInits(), ListInit->getNumInits());
	}

	// If list-initialization fails, or if we're doing any other kind of
	// initialization, we (eventually) consider constructors.
	if (Result == OR_No_Viable_Function)
	Result = tryToResolveOverload(/OnlyListConstructor/false);

	switch (Result) {
	case OR_Ambiguous:
	// FIXME: For list-initialization candidates, it'd usually be better to
	// list why they were not viable when given the initializer list itself as
	// an argument.
	Candidates.NoteCandidates(
	PartialDiagnosticAt(
	Kind.getLocation(),
	PDiag(diag::err_deduced_class_template_ctor_ambiguous)
	<< TemplateName),
	*this, OCD_AmbiguousCandidates, Inits);
	return QualType();

	case OR_No_Viable_Function: {
	CXXRecordDecl *Primary =
	cast<ClassTemplateDecl>(Template)->getTemplatedDecl();
	bool Complete =
	isCompleteType(Kind.getLocation(), Context.getTypeDeclType(Primary));
	Candidates.NoteCandidates(
	PartialDiagnosticAt(
	Kind.getLocation(),
	PDiag(Complete ? diag::err_deduced_class_template_ctor_no_viable
	: diag::err_deduced_class_template_incomplete)
	<< TemplateName << !Guides.empty()),
	*this, OCD_AllCandidates, Inits);
	return QualType();
	}

	case OR_Deleted: {
	Diag(Kind.getLocation(), diag::err_deduced_class_template_deleted)
	<< TemplateName;
	NoteDeletedFunction(Best->Function);
	return QualType();
	}

	case OR_Success:
	// C++ [over.match.list]p1:
	// In copy-list-initialization, if an explicit constructor is chosen, the
	// initialization is ill-formed.
	if (Kind.isCopyInit() && ListInit &&
	cast<CXXDeductionGuideDecl>(Best->Function)->isExplicit()) {
	bool IsDeductionGuide = !Best->Function->isImplicit();
	Diag(Kind.getLocation(), diag::err_deduced_class_template_explicit)
	<< TemplateName << IsDeductionGuide;
	Diag(Best->Function->getLocation(),
	diag::note_explicit_ctor_deduction_guide_here)
	<< IsDeductionGuide;
	return QualType();
	}

	// Make sure we didn't select an unusable deduction guide, and mark it
	// as referenced.
	DiagnoseUseOfDecl(Best->Function, Kind.getLocation());
	MarkFunctionReferenced(Kind.getLocation(), Best->Function);
	break;
	}

	// C++ [dcl.type.class.deduct]p1:
	// The placeholder is replaced by the return type of the function selected
	// by overload resolution for class template deduction.
	QualType DeducedType =
	SubstAutoType(TSInfo->getType(), Best->Function->getReturnType());
	Diag(TSInfo->getTypeLoc().getBeginLoc(),
	diag::warn_cxx14_compat_class_template_argument_deduction)
	<< TSInfo->getTypeLoc().getSourceRange() << 1 << DeducedType;

	// Warn if CTAD was used on a type that does not have any user-defined
	// deduction guides.
	if (!HasAnyDeductionGuide) {
	Diag(TSInfo->getTypeLoc().getBeginLoc(),
	diag::warn_ctad_maybe_unsupported)
	<< TemplateName;
	Diag(Template->getLocation(), diag::note_suppress_ctad_maybe_unsupported);
	}

	return DeducedType;
	}
	diff --git a/contrib/llvm-project/libcxx/include/__config b/contrib/llvm-project/libcxx/include/__config
	index 9009b9014abb..d7ba71906e26 100644
	--- a/contrib/llvm-project/libcxx/include/__config
	+++ b/contrib/llvm-project/libcxx/include/__config
	@@ -1,1271 +1,1271 @@
	// -- C++ --
	//===----------------------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP___CONFIG
	#define _LIBCPP___CONFIG

	#include <__config_site>

	#if defined(_MSC_VER) && !defined(__clang__)
	# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	# define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
	# endif
	#endif

	#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
	# pragma GCC system_header
	#endif

	#if defined(__apple_build_version__)
	// Given AppleClang XX.Y.Z, _LIBCPP_APPLE_CLANG_VER is XXYZ (e.g. AppleClang 14.0.3 => 1403)
	# define _LIBCPP_COMPILER_CLANG_BASED
	# define _LIBCPP_APPLE_CLANG_VER (__apple_build_version__ / 10000)
	#elif defined(__clang__)
	# define _LIBCPP_COMPILER_CLANG_BASED
	# define _LIBCPP_CLANG_VER (__clang_major__ * 100 + __clang_minor__)
	#elif defined(__GNUC__)
	# define _LIBCPP_COMPILER_GCC
	#endif

	#ifdef __cplusplus

	// _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
	// Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 16.0.1 == 16.00.01), _LIBCPP_VERSION is
	// defined to XXYYZZ.
	-# define _LIBCPP_VERSION 160004
	+# define _LIBCPP_VERSION 160005

	# define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
	# define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)

	// Valid C++ identifier that revs with every libc++ version. This can be used to
	// generate identifiers that must be unique for every released libc++ version.
	# define _LIBCPP_VERSIONED_IDENTIFIER _LIBCPP_CONCAT(v, _LIBCPP_VERSION)

	# if __STDC_HOSTED__ == 0
	# define _LIBCPP_FREESTANDING
	# endif

	# ifndef _LIBCPP_STD_VER
	# if __cplusplus <= 201103L
	# define _LIBCPP_STD_VER 11
	# elif __cplusplus <= 201402L
	# define _LIBCPP_STD_VER 14
	# elif __cplusplus <= 201703L
	# define _LIBCPP_STD_VER 17
	# elif __cplusplus <= 202002L
	# define _LIBCPP_STD_VER 20
	# else
	// Expected release year of the next C++ standard
	# define _LIBCPP_STD_VER 23
	# endif
	# endif // _LIBCPP_STD_VER

	# if defined(__ELF__)
	# define _LIBCPP_OBJECT_FORMAT_ELF 1
	# elif defined(__MACH__)
	# define _LIBCPP_OBJECT_FORMAT_MACHO 1
	# elif defined(_WIN32)
	# define _LIBCPP_OBJECT_FORMAT_COFF 1
	# elif defined(__wasm__)
	# define _LIBCPP_OBJECT_FORMAT_WASM 1
	# elif defined(_AIX)
	# define _LIBCPP_OBJECT_FORMAT_XCOFF 1
	# else
	// ... add new file formats here ...
	# endif

	# if _LIBCPP_ABI_VERSION >= 2
	// Change short string representation so that string data starts at offset 0,
	// improving its alignment in some cases.
	# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
	// Fix deque iterator type in order to support incomplete types.
	# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE
	// Fix undefined behavior in how std::list stores its linked nodes.
	# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB
	// Fix undefined behavior in how __tree stores its end and parent nodes.
	# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB
	// Fix undefined behavior in how __hash_table stores its pointer types.
	# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
	# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
	# define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE
	// Define a key function for `bad_function_call` in the library, to centralize
	// its vtable and typeinfo to libc++ rather than having all other libraries
	// using that class define their own copies.
	# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
	// Override the default return value of exception::what() for
	// bad_function_call::what() with a string that is specific to
	// bad_function_call (see http://wg21.link/LWG2233). This is an ABI break
	// because it changes the vtable layout of bad_function_call.
	# define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
	// Enable optimized version of __do_get_(un)signed which avoids redundant copies.
	# define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
	// Give reverse_iterator<T> one data member of type T, not two.
	// Also, in C++17 and later, don't derive iterator types from std::iterator.
	# define _LIBCPP_ABI_NO_ITERATOR_BASES
	// Use the smallest possible integer type to represent the index of the variant.
	// Previously libc++ used "unsigned int" exclusively.
	# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION
	// Unstable attempt to provide a more optimized std::function
	# define _LIBCPP_ABI_OPTIMIZED_FUNCTION
	// All the regex constants must be distinct and nonzero.
	# define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO
	// Re-worked external template instantiations for std::string with a focus on
	// performance and fast-path inlining.
	# define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
	// Enable clang::trivial_abi on std::unique_ptr.
	# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI
	// Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr
	# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI
	// std::random_device holds some state when it uses an implementation that gets
	// entropy from a file (see _LIBCPP_USING_DEV_RANDOM). When switching from this
	// implementation to another one on a platform that has already shipped
	// std::random_device, one needs to retain the same object layout to remain ABI
	// compatible. This switch removes these workarounds for platforms that don't care
	// about ABI compatibility.
	# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT
	// Don't export the legacy __basic_string_common class and its methods from the built library.
	# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
	// Don't export the legacy __vector_base_common class and its methods from the built library.
	# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
	// According to the Standard, `bitset::operator[] const` returns bool
	# define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL
	// Fix the implementation of CityHash used for std::hash<fundamental-type>.
	// This is an ABI break because `std::hash` will return a different result,
	// which means that hashing the same object in translation units built against
	// different versions of libc++ can return inconsistent results. This is especially
	// tricky since std::hash is used in the implementation of unordered containers.
	//
	// The incorrect implementation of CityHash has the problem that it drops some
	// bits on the floor.
	# define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION
	// Remove the base 10 implementation of std::to_chars from the dylib.
	// The implementation moved to the header, but we still export the symbols from
	// the dylib for backwards compatibility.
	# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
	# elif _LIBCPP_ABI_VERSION == 1
	# if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) \|\| defined(_LIBCPP_OBJECT_FORMAT_XCOFF))
	// Enable compiling copies of now inline methods into the dylib to support
	// applications compiled against older libraries. This is unnecessary with
	// COFF dllexport semantics, since dllexport forces a non-inline definition
	// of inline functions to be emitted anyway. Our own non-inline copy would
	// conflict with the dllexport-emitted copy, so we disable it. For XCOFF,
	// the linker will take issue with the symbols in the shared object if the
	// weak inline methods get visibility (such as from -fvisibility-inlines-hidden),
	// so disable it.
	# define _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS
	# endif
	// Feature macros for disabling pre ABI v1 features. All of these options
	// are deprecated.
	# if defined(__FreeBSD__)
	# define _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR
	# endif
	# endif

	# if defined(_LIBCPP_BUILDING_LIBRARY) \|\| _LIBCPP_ABI_VERSION >= 2
	// Enable additional explicit instantiations of iostreams components. This
	// reduces the number of weak definitions generated in programs that use
	// iostreams by providing a single strong definition in the shared library.
	# define _LIBCPP_ABI_ENABLE_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1

	// Define a key function for `bad_function_call` in the library, to centralize
	// its vtable and typeinfo to libc++ rather than having all other libraries
	// using that class define their own copies.
	# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
	# endif

	# define _LIBCPP_TOSTRING2(x) #x
	# define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x)

	# if __cplusplus < 201103L
	# define _LIBCPP_CXX03_LANG
	# endif

	# ifndef __has_attribute
	# define __has_attribute(__x) 0
	# endif

	# ifndef __has_builtin
	# define __has_builtin(__x) 0
	# endif

	# ifndef __has_extension
	# define __has_extension(__x) 0
	# endif

	# ifndef __has_feature
	# define __has_feature(__x) 0
	# endif

	# ifndef __has_cpp_attribute
	# define __has_cpp_attribute(__x) 0
	# endif

	# ifndef __has_constexpr_builtin
	# define __has_constexpr_builtin(x) 0
	# endif

	// '__is_identifier' returns '0' if '__x' is a reserved identifier provided by
	// the compiler and '1' otherwise.
	# ifndef __is_identifier
	# define __is_identifier(__x) 1
	# endif

	# ifndef __has_declspec_attribute
	# define __has_declspec_attribute(__x) 0
	# endif

	# define __has_keyword(__x) !(__is_identifier(__x))

	# ifndef __has_include
	# define __has_include(...) 0
	# endif

	# if !defined(_LIBCPP_COMPILER_CLANG_BASED) && __cplusplus < 201103L
	# error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11"
	# endif

	// FIXME: ABI detection should be done via compiler builtin macros. This
	// is just a placeholder until Clang implements such macros. For now assume
	// that Windows compilers pretending to be MSVC++ target the Microsoft ABI,
	// and allow the user to explicitly specify the ABI to handle cases where this
	// heuristic falls short.
	# if defined(_LIBCPP_ABI_FORCE_ITANIUM) && defined(_LIBCPP_ABI_FORCE_MICROSOFT)
	# error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be defined"
	# elif defined(_LIBCPP_ABI_FORCE_ITANIUM)
	# define _LIBCPP_ABI_ITANIUM
	# elif defined(_LIBCPP_ABI_FORCE_MICROSOFT)
	# define _LIBCPP_ABI_MICROSOFT
	# else
	# if defined(_WIN32) && defined(_MSC_VER)
	# define _LIBCPP_ABI_MICROSOFT
	# else
	# define _LIBCPP_ABI_ITANIUM
	# endif
	# endif

	# if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME)
	# define _LIBCPP_ABI_VCRUNTIME
	# endif

	# if __has_feature(experimental_library)
	# ifndef _LIBCPP_ENABLE_EXPERIMENTAL
	# define _LIBCPP_ENABLE_EXPERIMENTAL
	# endif
	# endif

	// Incomplete features get their own specific disabling flags. This makes it
	// easier to grep for target specific flags once the feature is complete.
	# if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY)
	# define _LIBCPP_HAS_NO_INCOMPLETE_FORMAT
	# endif

	// Need to detect which libc we're using if we're on Linux.
	# if defined(__linux__)
	# include <features.h>
	# if defined(__GLIBC_PREREQ)
	# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
	# else
	# define _LIBCPP_GLIBC_PREREQ(a, b) 0
	# endif // defined(__GLIBC_PREREQ)
	# endif // defined(__linux__)

	# if defined(__MVS__)
	# include <features.h> // for __NATIVE_ASCII_F
	# endif

	# ifdef __LITTLE_ENDIAN__
	# if __LITTLE_ENDIAN__
	# define _LIBCPP_LITTLE_ENDIAN
	# endif // __LITTLE_ENDIAN__
	# endif // __LITTLE_ENDIAN__

	# ifdef __BIG_ENDIAN__
	# if __BIG_ENDIAN__
	# define _LIBCPP_BIG_ENDIAN
	# endif // __BIG_ENDIAN__
	# endif // __BIG_ENDIAN__

	# ifdef __BYTE_ORDER__
	# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
	# define _LIBCPP_LITTLE_ENDIAN
	# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	# define _LIBCPP_BIG_ENDIAN
	# endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	# endif // __BYTE_ORDER__

	# ifdef __FreeBSD__
	# include <sys/endian.h>
	# include <osreldate.h>
	# if _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# else // _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_BIG_ENDIAN
	# endif // _BYTE_ORDER == _LITTLE_ENDIAN
	# endif // __FreeBSD__

	# if defined(__NetBSD__) \|\| defined(__OpenBSD__)
	# include <sys/endian.h>
	# if _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# else // _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_BIG_ENDIAN
	# endif // _BYTE_ORDER == _LITTLE_ENDIAN
	# endif // defined(__NetBSD__) \|\| defined(__OpenBSD__)

	# if defined(_WIN32)
	# define _LIBCPP_WIN32API
	# define _LIBCPP_LITTLE_ENDIAN
	# define _LIBCPP_SHORT_WCHAR 1
	// Both MinGW and native MSVC provide a "MSVC"-like environment
	# define _LIBCPP_MSVCRT_LIKE
	// If mingw not explicitly detected, assume using MS C runtime only if
	// a MS compatibility version is specified.
	# if defined(_MSC_VER) && !defined(__MINGW32__)
	# define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library
	# endif
	# if (defined(_M_AMD64) \|\| defined(__x86_64__)) \|\| (defined(_M_ARM) \|\| defined(__arm__))
	# define _LIBCPP_HAS_BITSCAN64
	# endif
	# define _LIBCPP_HAS_OPEN_WITH_WCHAR
	# endif // defined(_WIN32)

	# ifdef __sun__
	# include <sys/isa_defs.h>
	# ifdef _LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# else
	# define _LIBCPP_BIG_ENDIAN
	# endif
	# endif // __sun__

	# if defined(_AIX) && !defined(__64BIT__)
	// The size of wchar is 2 byte on 32-bit mode on AIX.
	# define _LIBCPP_SHORT_WCHAR 1
	# endif

	// Libc++ supports various implementations of std::random_device.
	//
	// _LIBCPP_USING_DEV_RANDOM
	// Read entropy from the given file, by default `/dev/urandom`.
	// If a token is provided, it is assumed to be the path to a file
	// to read entropy from. This is the default behavior if nothing
	// else is specified. This implementation requires storing state
	// inside `std::random_device`.
	//
	// _LIBCPP_USING_ARC4_RANDOM
	// Use arc4random(). This allows obtaining random data even when
	// using sandboxing mechanisms. On some platforms like Apple, this
	// is the recommended source of entropy for user-space programs.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_GETENTROPY
	// Use getentropy().
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_FUCHSIA_CPRNG
	// Use Fuchsia's zx_cprng_draw() system call, which is specified to
	// deliver high-quality entropy and cannot fail.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_NACL_RANDOM
	// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access,
	// including accesses to the special files under `/dev`. This implementation
	// uses the NaCL syscall `nacl_secure_random_init()` to get entropy.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_WIN32_RANDOM
	// Use rand_s(), for use on Windows.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	# if defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(__NetBSD__) \|\| defined(__OpenBSD__) \|\| \
	defined(__DragonFly__) \|\| defined(__sun__)
	# define _LIBCPP_USING_ARC4_RANDOM
	# elif defined(__wasi__) \|\| defined(__EMSCRIPTEN__)
	# define _LIBCPP_USING_GETENTROPY
	# elif defined(__Fuchsia__)
	# define _LIBCPP_USING_FUCHSIA_CPRNG
	# elif defined(__native_client__)
	# define _LIBCPP_USING_NACL_RANDOM
	# elif defined(_LIBCPP_WIN32API)
	# define _LIBCPP_USING_WIN32_RANDOM
	# else
	# define _LIBCPP_USING_DEV_RANDOM
	# endif

	# if !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN)
	# include <endian.h>
	# if __BYTE_ORDER == __LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# elif __BYTE_ORDER == __BIG_ENDIAN
	# define _LIBCPP_BIG_ENDIAN
	# else // __BYTE_ORDER == __BIG_ENDIAN
	# error unable to determine endian
	# endif
	# endif // !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN)

	# if __has_attribute(__no_sanitize__) && !defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_NO_CFI __attribute__((__no_sanitize__("cfi")))
	# else
	# define _LIBCPP_NO_CFI
	# endif

	# ifndef _LIBCPP_CXX03_LANG

	# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
	# define _ALIGNAS_TYPE(x) alignas(x)
	# define _ALIGNAS(x) alignas(x)
	# define _LIBCPP_NORETURN [[noreturn]]
	# define _NOEXCEPT noexcept
	# define _NOEXCEPT_(x) noexcept(x)
	# define _LIBCPP_CONSTEXPR constexpr

	# else

	# define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp)
	# define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x))))
	# define _ALIGNAS(x) __attribute__((__aligned__(x)))
	# define _LIBCPP_NORETURN __attribute__((__noreturn__))
	# define _LIBCPP_HAS_NO_NOEXCEPT
	# define nullptr __nullptr
	# define _NOEXCEPT throw()
	# define _NOEXCEPT_(x)
	# define static_assert(...) _Static_assert(__VA_ARGS__)
	# define decltype(...) __decltype(__VA_ARGS__)
	# define _LIBCPP_CONSTEXPR

	typedef __char16_t char16_t;
	typedef __char32_t char32_t;

	# endif

	# if !defined(__cpp_exceptions) \|\| __cpp_exceptions < 199711L
	# define _LIBCPP_NO_EXCEPTIONS
	# endif

	# define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp)

	# if defined(_LIBCPP_COMPILER_CLANG_BASED)

	# if defined(__APPLE__) && !defined(__i386__) && !defined(__x86_64__) && (!defined(__arm__) \|\| __ARM_ARCH_7K__ >= 2)
	# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
	# endif

	// Objective-C++ features (opt-in)
	# if __has_feature(objc_arc)
	# define _LIBCPP_HAS_OBJC_ARC
	# endif

	# if __has_feature(objc_arc_weak)
	# define _LIBCPP_HAS_OBJC_ARC_WEAK
	# endif

	# if __has_extension(blocks)
	# define _LIBCPP_HAS_EXTENSION_BLOCKS
	# endif

	# if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) && defined(__APPLE__)
	# define _LIBCPP_HAS_BLOCKS_RUNTIME
	# endif

	# if !__has_feature(address_sanitizer)
	# define _LIBCPP_HAS_NO_ASAN
	# endif

	// Allow for build-time disabling of unsigned integer sanitization
	# if __has_attribute(no_sanitize)
	# define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow")))
	# endif

	# define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__))

	# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__

	# elif defined(_LIBCPP_COMPILER_GCC)

	# if !defined(__SANITIZE_ADDRESS__)
	# define _LIBCPP_HAS_NO_ASAN
	# endif

	# define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__))

	# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__

	# endif // _LIBCPP_COMPILER_[CLANG\|GCC]

	# if defined(_LIBCPP_OBJECT_FORMAT_COFF)

	# ifdef _DLL
	# define _LIBCPP_CRT_FUNC __declspec(dllimport)
	# else
	# define _LIBCPP_CRT_FUNC
	# endif

	# if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) \|\| (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY))
	# define _LIBCPP_DLL_VIS
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS
	# define _LIBCPP_EXPORTED_FROM_ABI
	# elif defined(_LIBCPP_BUILDING_LIBRARY)
	# define _LIBCPP_DLL_VIS __declspec(dllexport)
	# if defined(__MINGW32__)
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
	# else
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS
	# endif
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport)
	# else
	# define _LIBCPP_DLL_VIS __declspec(dllimport)
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS
	# define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport)
	# endif

	# define _LIBCPP_TYPE_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_FUNC_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_EXCEPTION_ABI _LIBCPP_DLL_VIS
	# define _LIBCPP_HIDDEN
	# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	# define _LIBCPP_TEMPLATE_VIS
	# define _LIBCPP_TEMPLATE_DATA_VIS
	# define _LIBCPP_ENUM_VIS

	# else

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	# define _LIBCPP_VISIBILITY(vis) __attribute__((__visibility__(vis)))
	# else
	# define _LIBCPP_VISIBILITY(vis)
	# endif

	# define _LIBCPP_HIDDEN _LIBCPP_VISIBILITY("hidden")
	# define _LIBCPP_FUNC_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_TYPE_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_TEMPLATE_DATA_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_EXCEPTION_ABI _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS

	// TODO: Make this a proper customization point or remove the option to override it.
	# ifndef _LIBCPP_OVERRIDABLE_FUNC_VIS
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default")
	# endif

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	// The inline should be removed once PR32114 is resolved
	# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN
	# else
	# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	# endif

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	# if __has_attribute(__type_visibility__)
	# define _LIBCPP_TEMPLATE_VIS __attribute__((__type_visibility__("default")))
	# else
	# define _LIBCPP_TEMPLATE_VIS __attribute__((__visibility__("default")))
	# endif
	# else
	# define _LIBCPP_TEMPLATE_VIS
	# endif

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__)
	# define _LIBCPP_ENUM_VIS __attribute__((__type_visibility__("default")))
	# else
	# define _LIBCPP_ENUM_VIS
	# endif

	# endif // defined(_LIBCPP_OBJECT_FORMAT_COFF)

	# if __has_attribute(exclude_from_explicit_instantiation)
	# define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__))
	# else
	// Try to approximate the effect of exclude_from_explicit_instantiation
	// (which is that entities are not assumed to be provided by explicit
	// template instantiations in the dylib) by always inlining those entities.
	# define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE
	# endif

	// This macro marks a symbol as being hidden from libc++'s ABI. This is achieved
	// on two levels:
	// 1. The symbol is given hidden visibility, which ensures that users won't start exporting
	// symbols from their dynamic library by means of using the libc++ headers. This ensures
	// that those symbols stay private to the dynamic library in which it is defined.
	//
	// 2. The symbol is given an ABI tag that changes with each version of libc++. This ensures
	// that no ODR violation can arise from mixing two TUs compiled with different versions
	// of libc++ where we would have changed the definition of a symbol. If the symbols shared
	// the same name, the ODR would require that their definitions be token-by-token equivalent,
	// which basically prevents us from being able to make any change to any function in our
	// headers. Using this ABI tag ensures that the symbol name is "bumped" artificially at
	// each release, which lets us change the definition of these symbols at our leisure.
	// Note that historically, this has been achieved in various ways, including force-inlining
	// all functions or giving internal linkage to all functions. Both these (previous) solutions
	// suffer from drawbacks that lead notably to code bloat.
	//
	// Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend
	// on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library.
	//
	// Also note that the _LIBCPP_HIDE_FROM_ABI_VIRTUAL macro should be used on virtual functions
	// instead of _LIBCPP_HIDE_FROM_ABI. That macro does not use an ABI tag. Indeed, the mangled
	// name of a virtual function is part of its ABI, since some architectures like arm64e can sign
	// the virtual function pointer in the vtable based on the mangled name of the function. Since
	// we use an ABI tag that changes with each released version, the mangled name of the virtual
	// function would change, which is incorrect. Note that it doesn't make much sense to change
	// the implementation of a virtual function in an ABI-incompatible way in the first place,
	// since that would be an ABI break anyway. Hence, the lack of ABI tag should not be noticeable.
	//
	// TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing
	// the length of symbols with an ABI tag. In practice, we should remove the escape hatch and
	// use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70.
	# ifndef _LIBCPP_NO_ABI_TAG
	# define _LIBCPP_HIDE_FROM_ABI \
	_LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \
	__attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_VERSIONED_IDENTIFIER))))
	# else
	# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
	# endif
	# define _LIBCPP_HIDE_FROM_ABI_VIRTUAL _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION

	// This macro provides a HIDE_FROM_ABI equivalent that can be applied to extern
	// "C" function, as those lack mangling.
	# define _LIBCPP_HIDE_FROM_ABI_C _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION

	# ifdef _LIBCPP_BUILDING_LIBRARY
	# if _LIBCPP_ABI_VERSION > 1
	# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
	# else
	# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1
	# endif
	# else
	# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
	# endif

	// Just so we can migrate to the new macros gradually.
	# define _LIBCPP_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI

	// Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect.
	// clang-format off
	# define _LIBCPP_BEGIN_NAMESPACE_STD namespace std { inline namespace _LIBCPP_ABI_NAMESPACE {
	# define _LIBCPP_END_NAMESPACE_STD }}
	# define _VSTD std

	_LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD

	# if _LIBCPP_STD_VER > 14
	# define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \
	_LIBCPP_BEGIN_NAMESPACE_STD inline namespace __fs { namespace filesystem {
	# else
	# define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \
	_LIBCPP_BEGIN_NAMESPACE_STD namespace __fs { namespace filesystem {
	# endif

	# define _LIBCPP_END_NAMESPACE_FILESYSTEM _LIBCPP_END_NAMESPACE_STD }}
	// clang-format on

	# define _VSTD_FS std::__fs::filesystem

	# if __has_attribute(__enable_if__)
	# define _LIBCPP_PREFERRED_OVERLOAD __attribute__((__enable_if__(true, "")))
	# endif

	# ifndef __SIZEOF_INT128__
	# define _LIBCPP_HAS_NO_INT128
	# endif

	# ifndef __cpp_consteval
	# define _LIBCPP_CONSTEVAL _LIBCPP_CONSTEXPR
	# else
	# define _LIBCPP_CONSTEVAL consteval
	# endif

	# if __has_attribute(__malloc__)
	# define _LIBCPP_NOALIAS __attribute__((__malloc__))
	# else
	# define _LIBCPP_NOALIAS
	# endif

	# if __has_attribute(__using_if_exists__)
	# define _LIBCPP_USING_IF_EXISTS __attribute__((__using_if_exists__))
	# else
	# define _LIBCPP_USING_IF_EXISTS
	# endif

	# ifdef _LIBCPP_CXX03_LANG
	# define _LIBCPP_DECLARE_STRONG_ENUM(x) \
	struct _LIBCPP_TYPE_VIS x { \
	enum __lx
	// clang-format off
	# define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \
	__lx __v_; \
	_LIBCPP_INLINE_VISIBILITY x(__lx __v) : __v_(__v) {} \
	_LIBCPP_INLINE_VISIBILITY explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \
	_LIBCPP_INLINE_VISIBILITY operator int() const { return __v_; } \
	};
	// clang-format on

	# else // _LIBCPP_CXX03_LANG
	# define _LIBCPP_DECLARE_STRONG_ENUM(x) enum class _LIBCPP_ENUM_VIS x
	# define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x)
	# endif // _LIBCPP_CXX03_LANG

	# if defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(_LIBCPP_MSVCRT_LIKE) \|\| defined(__sun__) \|\| \
	defined(__NetBSD__)
	# define _LIBCPP_LOCALE__L_EXTENSIONS 1
	# endif

	# ifdef __FreeBSD__
	# define _DECLARE_C99_LDBL_MATH 1
	# endif

	// If we are getting operator new from the MSVC CRT, then allocation overloads
	// for align_val_t were added in 19.12, aka VS 2017 version 15.3.
	# if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912
	# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
	# elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new)
	// We're deferring to Microsoft's STL to provide aligned new et al. We don't
	// have it unless the language feature test macro is defined.
	# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
	# elif defined(__MVS__)
	# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
	# endif

	# if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) \|\| (!defined(__cpp_aligned_new) \|\| __cpp_aligned_new < 201606)
	# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION
	# endif

	// It is not yet possible to use aligned_alloc() on all Apple platforms since
	// 10.15 was the first version to ship an implementation of aligned_alloc().
	# if defined(__APPLE__)
	# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \
	__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500)
	# define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC
	# endif
	# elif defined(__ANDROID__) && __ANDROID_API__ < 28
	// Android only provides aligned_alloc when targeting API 28 or higher.
	# define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC
	# endif

	# if defined(__APPLE__) \|\| defined(__FreeBSD__)
	# define _LIBCPP_HAS_DEFAULTRUNELOCALE
	# endif

	# if defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(__sun__)
	# define _LIBCPP_WCTYPE_IS_MASK
	# endif

	# if _LIBCPP_STD_VER <= 17 \|\| !defined(__cpp_char8_t)
	# define _LIBCPP_HAS_NO_CHAR8_T
	# endif

	// Deprecation macros.
	//
	// Deprecations warnings are always enabled, except when users explicitly opt-out
	// by defining _LIBCPP_DISABLE_DEPRECATION_WARNINGS.
	# if !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
	# if __has_attribute(deprecated)
	# define _LIBCPP_DEPRECATED __attribute__((deprecated))
	# define _LIBCPP_DEPRECATED_(m) __attribute__((deprecated(m)))
	# elif _LIBCPP_STD_VER > 11
	# define _LIBCPP_DEPRECATED [[deprecated]]
	# define _LIBCPP_DEPRECATED_(m) [[deprecated(m)]]
	# else
	# define _LIBCPP_DEPRECATED
	# define _LIBCPP_DEPRECATED_(m)
	# endif
	# else
	# define _LIBCPP_DEPRECATED
	# define _LIBCPP_DEPRECATED_(m)
	# endif

	# if !defined(_LIBCPP_CXX03_LANG)
	# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX11
	# endif

	# if _LIBCPP_STD_VER > 11
	# define _LIBCPP_DEPRECATED_IN_CXX14 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX14
	# endif

	# if _LIBCPP_STD_VER > 14
	# define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX17
	# endif

	# if _LIBCPP_STD_VER > 17
	# define _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX20
	# endif

	#if _LIBCPP_STD_VER >= 23
	# define _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_DEPRECATED
	#else
	# define _LIBCPP_DEPRECATED_IN_CXX23
	#endif

	# if !defined(_LIBCPP_HAS_NO_CHAR8_T)
	# define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_WITH_CHAR8_T
	# endif

	// Macros to enter and leave a state where deprecation warnings are suppressed.
	# if defined(_LIBCPP_COMPILER_CLANG_BASED) \|\| defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \
	_Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") \
	_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
	# define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop")
	# else
	# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH
	# define _LIBCPP_SUPPRESS_DEPRECATED_POP
	# endif

	# if _LIBCPP_STD_VER <= 11
	# define _LIBCPP_EXPLICIT_AFTER_CXX11
	# else
	# define _LIBCPP_EXPLICIT_AFTER_CXX11 explicit
	# endif

	# if _LIBCPP_STD_VER > 11
	# define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_SINCE_CXX14
	# endif

	# if _LIBCPP_STD_VER > 14
	# define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_SINCE_CXX17
	# endif

	# if _LIBCPP_STD_VER > 17
	# define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_SINCE_CXX20
	# endif

	# if _LIBCPP_STD_VER > 20
	# define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_SINCE_CXX23
	# endif

	# if __has_cpp_attribute(nodiscard)
	# define _LIBCPP_NODISCARD [[nodiscard]]
	# else
	// We can't use GCC's [[gnu::warn_unused_result]] and
	// __attribute__((warn_unused_result)), because GCC does not silence them via
	// (void) cast.
	# define _LIBCPP_NODISCARD
	# endif

	// _LIBCPP_NODISCARD_EXT may be used to apply [[nodiscard]] to entities not
	// specified as such as an extension.
	# if !defined(_LIBCPP_DISABLE_NODISCARD_EXT)
	# define _LIBCPP_NODISCARD_EXT _LIBCPP_NODISCARD
	# else
	# define _LIBCPP_NODISCARD_EXT
	# endif

	# if _LIBCPP_STD_VER > 17 \|\| !defined(_LIBCPP_DISABLE_NODISCARD_EXT)
	# define _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_NODISCARD
	# else
	# define _LIBCPP_NODISCARD_AFTER_CXX17
	# endif

	# if __has_attribute(__no_destroy__)
	# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__))
	# else
	# define _LIBCPP_NO_DESTROY
	# endif

	# ifndef _LIBCPP_HAS_NO_ASAN
	extern "C" _LIBCPP_FUNC_VIS void
	__sanitizer_annotate_contiguous_container(const void, const void, const void, const void);
	# endif

	// Try to find out if RTTI is disabled.
	# if !defined(__cpp_rtti) \|\| __cpp_rtti < 199711L
	# define _LIBCPP_HAS_NO_RTTI
	# endif

	# ifndef _LIBCPP_WEAK
	# define _LIBCPP_WEAK __attribute__((__weak__))
	# endif

	// Thread API
	// clang-format off
	# if !defined(_LIBCPP_HAS_NO_THREADS) && \
	!defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \
	!defined(_LIBCPP_HAS_THREAD_API_WIN32) && \
	!defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)

	# if defined(__FreeBSD__) \|\| \
	defined(__wasi__) \|\| \
	defined(__NetBSD__) \|\| \
	defined(__OpenBSD__) \|\| \
	defined(__NuttX__) \|\| \
	defined(__linux__) \|\| \
	defined(__GNU__) \|\| \
	defined(__APPLE__) \|\| \
	defined(__sun__) \|\| \
	defined(__MVS__) \|\| \
	defined(_AIX) \|\| \
	defined(__EMSCRIPTEN__)
	// clang-format on
	# define _LIBCPP_HAS_THREAD_API_PTHREAD
	# elif defined(__Fuchsia__)
	// TODO(44575): Switch to C11 thread API when possible.
	# define _LIBCPP_HAS_THREAD_API_PTHREAD
	# elif defined(_LIBCPP_WIN32API)
	# define _LIBCPP_HAS_THREAD_API_WIN32
	# else
	# error "No thread API"
	# endif // _LIBCPP_HAS_THREAD_API
	# endif // _LIBCPP_HAS_NO_THREADS

	# if defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
	# if defined(__ANDROID__) && __ANDROID_API__ >= 30
	# define _LIBCPP_HAS_COND_CLOCKWAIT
	# elif defined(_LIBCPP_GLIBC_PREREQ)
	# if _LIBCPP_GLIBC_PREREQ(2, 30)
	# define _LIBCPP_HAS_COND_CLOCKWAIT
	# endif
	# endif
	# endif

	# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
	# error _LIBCPP_HAS_THREAD_API_PTHREAD may only be defined when \
	_LIBCPP_HAS_NO_THREADS is not defined.
	# endif

	# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)
	# error _LIBCPP_HAS_THREAD_API_EXTERNAL may not be defined when \
	_LIBCPP_HAS_NO_THREADS is defined.
	# endif

	# if defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) && !defined(_LIBCPP_HAS_NO_THREADS)
	# error _LIBCPP_HAS_NO_MONOTONIC_CLOCK may only be defined when \
	_LIBCPP_HAS_NO_THREADS is defined.
	# endif

	# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__)
	# define __STDCPP_THREADS__ 1
	# endif

	// The glibc and Bionic implementation of pthreads implements
	// pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32
	// mutexes have no destroy mechanism.
	//
	// This optimization can't be performed on Apple platforms, where
	// pthread_mutex_destroy can allow the kernel to release resources.
	// See https://llvm.org/D64298 for details.
	//
	// TODO(EricWF): Enable this optimization on Bionic after speaking to their
	// respective stakeholders.
	// clang-format off
	# if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) \|\| \
	(defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) \|\| \
	defined(_LIBCPP_HAS_THREAD_API_WIN32)
	// clang-format on
	# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION
	# endif

	// Destroying a condvar is a nop on Windows.
	//
	// This optimization can't be performed on Apple platforms, where
	// pthread_cond_destroy can allow the kernel to release resources.
	// See https://llvm.org/D64298 for details.
	//
	// TODO(EricWF): This is potentially true for some pthread implementations
	// as well.
	# if (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) \|\| defined(_LIBCPP_HAS_THREAD_API_WIN32)
	# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION
	# endif

	// Some systems do not provide gets() in their C library, for security reasons.
	# if defined(_LIBCPP_MSVCRT) \|\| (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) \|\| defined(__OpenBSD__)
	# define _LIBCPP_C_HAS_NO_GETS
	# endif

	# if defined(__BIONIC__) \|\| defined(__NuttX__) \|\| defined(__Fuchsia__) \|\| defined(__wasi__) \|\| \
	defined(_LIBCPP_HAS_MUSL_LIBC) \|\| defined(__OpenBSD__)
	# define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
	# endif

	# if __has_feature(cxx_atomic) \|\| __has_extension(c_atomic) \|\| __has_keyword(_Atomic)
	# define _LIBCPP_HAS_C_ATOMIC_IMP
	# elif defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_HAS_GCC_ATOMIC_IMP
	# endif

	# if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \
	!defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP)
	# define _LIBCPP_HAS_NO_ATOMIC_HEADER
	# else
	# ifndef _LIBCPP_ATOMIC_FLAG_TYPE
	# define _LIBCPP_ATOMIC_FLAG_TYPE bool
	# endif
	# ifdef _LIBCPP_FREESTANDING
	# define _LIBCPP_ATOMIC_ONLY_USE_BUILTINS
	# endif
	# endif

	# ifndef _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
	# define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
	# endif

	# if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS)
	# if defined(__clang__) && __has_attribute(acquire_capability)
	// Work around the attribute handling in clang. When both __declspec and
	// __attribute__ are present, the processing goes awry preventing the definition
	// of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus
	// combining the two does work.
	# if !defined(_MSC_VER)
	# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS
	# endif
	# endif
	# endif

	# ifdef _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS
	# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x))
	# else
	# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x)
	# endif

	# if _LIBCPP_STD_VER > 17
	# define _LIBCPP_CONSTINIT constinit
	# elif __has_attribute(__require_constant_initialization__)
	# define _LIBCPP_CONSTINIT __attribute__((__require_constant_initialization__))
	# else
	# define _LIBCPP_CONSTINIT
	# endif

	# if __has_attribute(__diagnose_if__) && !defined(_LIBCPP_DISABLE_ADDITIONAL_DIAGNOSTICS)
	# define _LIBCPP_DIAGNOSE_WARNING(...) __attribute__((__diagnose_if__(__VA_ARGS__, "warning")))
	# else
	# define _LIBCPP_DIAGNOSE_WARNING(...)
	# endif

	// Use a function like macro to imply that it must be followed by a semicolon
	# if __has_cpp_attribute(fallthrough)
	# define _LIBCPP_FALLTHROUGH() [[fallthrough]]
	# elif __has_attribute(__fallthrough__)
	# define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__))
	# else
	# define _LIBCPP_FALLTHROUGH() ((void)0)
	# endif

	# if __has_cpp_attribute(_Clang::__lifetimebound__)
	# define _LIBCPP_LIFETIMEBOUND [[_Clang::__lifetimebound__]]
	# else
	# define _LIBCPP_LIFETIMEBOUND
	# endif

	# if __has_attribute(__nodebug__)
	# define _LIBCPP_NODEBUG __attribute__((__nodebug__))
	# else
	# define _LIBCPP_NODEBUG
	# endif

	# if __has_attribute(__standalone_debug__)
	# define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__))
	# else
	# define _LIBCPP_STANDALONE_DEBUG
	# endif

	# if __has_attribute(__preferred_name__)
	# define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x)))
	# else
	# define _LIBCPP_PREFERRED_NAME(x)
	# endif

	// We often repeat things just for handling wide characters in the library.
	// When wide characters are disabled, it can be useful to have a quick way of
	// disabling it without having to resort to #if-#endif, which has a larger
	// impact on readability.
	# if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
	# define _LIBCPP_IF_WIDE_CHARACTERS(...)
	# else
	# define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__
	# endif

	# if defined(_LIBCPP_ABI_MICROSOFT) && __has_declspec_attribute(empty_bases)
	# define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases)
	# else
	# define _LIBCPP_DECLSPEC_EMPTY_BASES
	# endif

	# if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES)
	# define _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR
	# define _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS
	# define _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
	# define _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS
	# define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
	# endif // _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES

	# if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES)
	# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS
	# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION
	# define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS
	# define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS
	# define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR
	# define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS
	# endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES

	# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")")
	# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")")

	# ifndef _LIBCPP_NO_AUTO_LINK
	# if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY)
	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	# pragma comment(lib, "c++.lib")
	# else
	# pragma comment(lib, "libc++.lib")
	# endif
	# endif // defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY)
	# endif // _LIBCPP_NO_AUTO_LINK

	// Configures the fopen close-on-exec mode character, if any. This string will
	// be appended to any mode string used by fstream for fopen/fdopen.
	//
	// Not all platforms support this, but it helps avoid fd-leaks on platforms that
	// do.
	# if defined(__BIONIC__)
	# define _LIBCPP_FOPEN_CLOEXEC_MODE "e"
	# else
	# define _LIBCPP_FOPEN_CLOEXEC_MODE
	# endif

	// Support for _FILE_OFFSET_BITS=64 landed gradually in Android, so the full set
	// of functions used in cstdio may not be available for low API levels when
	// using 64-bit file offsets on LP32.
	# if defined(__BIONIC__) && defined(__USE_FILE_OFFSET64) && __ANDROID_API__ < 24
	# define _LIBCPP_HAS_NO_FGETPOS_FSETPOS
	# endif

	# if __has_attribute(__init_priority__)
	# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((__init_priority__(100)))
	# else
	# define _LIBCPP_INIT_PRIORITY_MAX
	# endif

	# if __has_attribute(__format__)
	// The attribute uses 1-based indices for ordinary and static member functions.
	// The attribute uses 2-based indices for non-static member functions.
	# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \
	__attribute__((__format__(archetype, format_string_index, first_format_arg_index)))
	# else
	# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */
	# endif

	# if __has_cpp_attribute(msvc::no_unique_address)
	// MSVC implements [[no_unique_address]] as a silent no-op currently.
	// (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.)
	// However, MSVC implements [[msvc::no_unique_address]] which does what
	// [[no_unique_address]] is supposed to do, in general.

	// Clang-cl does not yet (14.0) implement either [[no_unique_address]] or
	// [[msvc::no_unique_address]] though. If/when it does implement
	// [[msvc::no_unique_address]], this should be preferred though.
	# define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
	# elif __has_cpp_attribute(no_unique_address)
	# define _LIBCPP_NO_UNIQUE_ADDRESS [[no_unique_address]]
	# else
	# define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */
	// Note that this can be replaced by #error as soon as clang-cl
	// implements msvc::no_unique_address, since there should be no C++20
	// compiler that doesn't support one of the two attributes at that point.
	// We generally don't want to use this macro outside of C++20-only code,
	// because using it conditionally in one language version only would make
	// the ABI inconsistent.
	# endif

	# ifdef _LIBCPP_COMPILER_CLANG_BASED
	# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
	# define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
	# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str))
	# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
	# elif defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
	# define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
	# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str)
	# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str))
	# else
	# define _LIBCPP_DIAGNOSTIC_PUSH
	# define _LIBCPP_DIAGNOSTIC_POP
	# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str)
	# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
	# endif

	# if defined(_AIX) && !defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_PACKED_BYTE_FOR_AIX _Pragma("pack(1)")
	# define _LIBCPP_PACKED_BYTE_FOR_AIX_END _Pragma("pack(pop)")
	# else
	# define _LIBCPP_PACKED_BYTE_FOR_AIX /* empty */
	# define _LIBCPP_PACKED_BYTE_FOR_AIX_END /* empty */
	# endif

	# if __has_attribute(__packed__)
	# define _LIBCPP_PACKED __attribute__((__packed__))
	# else
	# define _LIBCPP_PACKED
	# endif

	// c8rtomb() and mbrtoc8() were added in C++20 and C23. Support for these
	// functions is gradually being added to existing C libraries. The conditions
	// below check for known C library versions and conditions under which these
	// functions are declared by the C library.
	# define _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8
	// GNU libc 2.36 and newer declare c8rtomb() and mbrtoc8() in C++ modes if
	// __cpp_char8_t is defined or if C2X extensions are enabled. Determining
	// the latter depends on internal GNU libc details that are not appropriate
	// to depend on here, so any declarations present when __cpp_char8_t is not
	// defined are ignored.
	# if defined(_LIBCPP_GLIBC_PREREQ)
	# if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t)
	# undef _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8
	# endif
	# endif

	// There are a handful of public standard library types that are intended to
	// support CTAD but don't need any explicit deduction guides to do so. This
	// macro is used to mark them as such, which suppresses the
	// '-Wctad-maybe-unsupported' compiler warning when CTAD is used in user code
	// with these classes.
	#if _LIBCPP_STD_VER >= 17
	# define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) \
	template <class ..._Tag> \
	_ClassName(typename _Tag::__allow_ctad...) -> _ClassName<_Tag...>
	#else
	# define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "")
	#endif

	// TODO(varconst): currently, there are bugs in Clang's intrinsics when handling Objective-C++ `id`, so don't use
	// compiler intrinsics in the Objective-C++ mode.
	# ifdef __OBJC__
	# define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS
	# endif

	#endif // __cplusplus

	#endif // _LIBCPP___CONFIG
	diff --git a/contrib/llvm-project/lld/docs/WebAssembly.rst b/contrib/llvm-project/lld/docs/WebAssembly.rst
	index c40d4b322080..dad3177e2c7d 100644
	--- a/contrib/llvm-project/lld/docs/WebAssembly.rst
	+++ b/contrib/llvm-project/lld/docs/WebAssembly.rst
	@@ -1,225 +1,258 @@
	WebAssembly lld port
	====================

	The WebAssembly version of lld takes WebAssembly binaries as inputs and produces
	a WebAssembly binary as its output. For the most part it tries to mimic the
	behaviour of traditional ELF linkers and specifically the ELF lld port. Where
	possible the command line flags and the semantics should be the same.


	Object file format
	------------------

	The WebAssembly object file format used by LLVM and LLD is specified as part of
	the WebAssembly tool conventions on linking_.

	This is the object format that the llvm will produce when run with the
	``wasm32-unknown-unknown`` target.

	Usage
	-----

	The WebAssembly version of lld is installed as wasm-ld. It shared many
	common linker flags with ld.lld but also includes several
	WebAssembly-specific options:

	.. option:: --no-entry

	Don't search for the entry point symbol (by default ``_start``).

	.. option:: --export-table

	Export the function table to the environment.

	.. option:: --import-table

	Import the function table from the environment.

	.. option:: --export-all

	Export all symbols (normally combined with --no-gc-sections)

	Note that this will not export linker-generated mutable globals unless
	the resulting binaryen already includes the 'mutable-globals' features
	since that would otherwise create and invalid binaryen.

	.. option:: --export-dynamic

	When building an executable, export any non-hidden symbols. By default only
	the entry point and any symbols marked as exports (either via the command line
	or via the `export-name` source attribute) are exported.

	.. option:: --global-base=<value>

	Address at which to place global data.

	.. option:: --no-merge-data-segments

	Disable merging of data segments.

	.. option:: --stack-first

	Place stack at start of linear memory rather than after data.

	.. option:: --compress-relocations

	Relocation targets in the code section are 5-bytes wide in order to
	potentially accommodate the largest LEB128 value. This option will cause the
	linker to shrink the code section to remove any padding from the final
	output. However because it affects code offset, this option is not
	compatible with outputting debug information.

	.. option:: --allow-undefined

	Allow undefined symbols in linked binary. This is the legacy
	flag which corresponds to ``--unresolve-symbols=ignore`` +
	``--import-undefined``.

	+.. option:: --allow-undefined-file=<filename>
	+
	+ Like ``--allow-undefined``, but the filename specified a flat list of
	+ symbols, one per line, which are allowed to be undefined.
	+
	.. option:: --unresolved-symbols=<method>

	This is a more full featured version of ``--allow-undefined``.
	The semanatics of the different methods are as follows:

	report-all:

	Report all unresolved symbols. This is the default. Normally the linker
	will generate an error message for each reported unresolved symbol but the
	option ``--warn-unresolved-symbols`` can change this to a warning.

	ignore-all:

	Resolve all undefined symbols to zero. For data and function addresses
	this is trivial. For direct function calls, the linker will generate a
	trapping stub function in place of the undefined function.

	import-dynamic:

	Undefined symbols generate WebAssembly imports, including undefined data
	symbols. This is somewhat similar to the --import-undefined option but
	works all symbol types. This options puts limitations on the type of
	relocations that are allowed for imported data symbols. Relocations that
	require absolute data addresses (i.e. All R_WASM_MEMORY_ADDR_I32) will
	generate an error if they cannot be resolved statically. For clang/llvm
	this means inputs should be compiled with `-fPIC` (i.e. `pic` or
	`dynamic-no-pic` relocation models). This options is useful for linking
	binaries that are themselves static (non-relocatable) but whose undefined
	symbols are resolved by a dynamic linker. Since the dynamic linking API is
	experimental, this option currently requires `--experimental-pic` to also
	be specified.

	.. option:: --import-memory

	Import memory from the environment.

	.. option:: --import-undefined

	Generate WebAssembly imports for undefined symbols, where possible. For
	example, for function symbols this is always possible, but in general this
	is not possible for undefined data symbols. Undefined data symbols will
	still be reported as normal (in accordance with ``--unresolved-symbols``).

	.. option:: --initial-memory=<value>

	Initial size of the linear memory. Default: static data size.

	.. option:: --max-memory=<value>

	Maximum size of the linear memory. Default: unlimited.

	By default the function table is neither imported nor exported, but defined
	for internal use only.

	Behaviour
	---------

	In general, where possible, the WebAssembly linker attempts to emulate the
	behaviour of a traditional ELF linker, and in particular the ELF port of lld.
	For more specific details on how this is achieved see the tool conventions on
	linking_.

	Function Signatures
	~~~~~~~~~~~~~~~~~~~

	One way in which the WebAssembly linker differs from traditional native linkers
	is that function signature checking is strict in WebAssembly. It is a
	validation error for a module to contain a call site that doesn't agree with
	the target signature. Even though this is undefined behaviour in C/C++, it is not
	uncommon to find this in real-world C/C++ programs. For example, a call site in
	one compilation unit which calls a function defined in another compilation
	unit but with too many arguments.

	In order not to generate such invalid modules, lld has two modes of handling such
	mismatches: it can simply error-out or it can create stub functions that will
	trap at runtime (functions that contain only an ``unreachable`` instruction)
	and use these stub functions at the otherwise invalid call sites.

	The default behaviour is to generate these stub function and to produce
	a warning. The ``--fatal-warnings`` flag can be used to disable this behaviour
	and error out if mismatched are found.

	Exports
	~~~~~~~

	When building a shared library any symbols marked as ``visibility=default`` will
	be exported.

	When building an executable, only the entry point (``_start``) and symbols with
	the ``WASM_SYMBOL_EXPORTED`` flag are exported by default. In LLVM the
	``WASM_SYMBOL_EXPORTED`` flag is set by the ``wasm-export-name`` attribute which
	in turn can be set using ``__attribute__((export_name))`` clang attribute.

	In addition, symbols can be exported via the linker command line using
	``--export`` (which will error if the symbol is not found) or
	``--export-if-defined`` (which will not).

	Finally, just like with native ELF linker the ``--export-dynamic`` flag can be
	used to export symbols in the executable which are marked as
	``visibility=default``.

	Imports
	~~~~~~~

	By default no undefined symbols are allowed in the final binary. The flag
	``--allow-undefined`` results in a WebAssembly import being defined for each
	undefined symbol. It is then up to the runtime to provide such symbols.
	+``--allow-undefined-file`` is the same but allows a list of symbols to be
	+specified.

	Alternatively symbols can be marked in the source code as with the
	``import_name`` and/or ``import_module`` clang attributes which signals that
	they are expected to be undefined at static link time.

	+Stub Libraries
	+~~~~~~~~~~~~~~
	+
	+Another way to specify imports and exports is via a "stub library". This
	+feature is inspired by the ELF stub objects which are supported by the Solaris
	+linker. Stub libraries are text files that can be passed as normal linker
	+inputs, similar to how linker scripts can be passed to the ELF linker. The stub
	+library is a stand-in for a set of symbols that will be available at runtime,
	+but doesn't contain any actual code or data. Instead it contains just a list of
	+symbols, one per line. Each symbol can specify zero or more dependencies.
	+These dependencies are symbols that must be defined, and exported, by the output
	+module if the symbol is question is imported/required by the output module.
	+
	+For example, imagine the runtime provides an external symbol ``foo`` that
	+depends on the ``malloc`` and ``free``. This can be expressed simply as::
	+
	+ #STUB
	+ foo: malloc,free
	+
	+Here we are saying that ``foo`` is allowed to be imported (undefined) but that
	+if it is imported, then the output module must also export ``malloc`` and
	+``free`` to the runtime. If ``foo`` is imported (undefined), but the output
	+module does not define ``malloc`` and ``free`` then the link will fail.
	+
	+Stub libraries must begin with ``#STUB`` on a line by itself.
	+
	Garbage Collection
	~~~~~~~~~~~~~~~~~~

	Since WebAssembly is designed with size in mind the linker defaults to
	``--gc-sections`` which means that all unused functions and data segments will
	be stripped from the binary.

	The symbols which are preserved by default are:

	- The entry point (by default ``_start``).
	- Any symbol which is to be exported.
	- Any symbol transitively referenced by the above.

	Weak Undefined Functions
	~~~~~~~~~~~~~~~~~~~~~~~~

	On native platforms, calls to weak undefined functions end up as calls to the
	null function pointer. With WebAssembly, direct calls must reference a defined
	function (with the correct signature). In order to handle this case the linker
	will generate function a stub containing only the ``unreachable`` instruction
	and use this for any direct references to an undefined weak function.

	For example a runtime call to a weak undefined function ``foo`` will up trapping
	on ``unreachable`` inside and linker-generated function called
	``undefined:foo``.

	Missing features
	----------------

	- Merging of data section similar to ``SHF_MERGE`` in the ELF world is not
	supported.
	- No support for creating shared libraries. The spec for shared libraries in
	WebAssembly is still in flux:
	https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md

	.. _linking: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md
	diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
	index 953e15e358f1..8ac6e7dac63e 100644
	--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
	+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
	@@ -1,994 +1,1003 @@
	//===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the generic AliasAnalysis interface, which is used as the
	// common interface used by all clients of alias analysis information, and
	// implemented by all alias analysis implementations. Mod/Ref information is
	// also captured by this interface.
	//
	// Implementations of this interface must implement the various virtual methods,
	// which automatically provides functionality for the entire suite of client
	// APIs.
	//
	// This API identifies memory regions with the MemoryLocation class. The pointer
	// component specifies the base memory address of the region. The Size specifies
	// the maximum size (in address units) of the memory region, or
	// MemoryLocation::UnknownSize if the size is not known. The TBAA tag
	// identifies the "type" of the memory reference; see the
	// TypeBasedAliasAnalysis class for details.
	//
	// Some non-obvious details include:
	// - Pointers that point to two completely different objects in memory never
	// alias, regardless of the value of the Size component.
	// - NoAlias doesn't imply inequal pointers. The most obvious example of this
	// is two pointers to constant memory. Even if they are equal, constant
	// memory is never stored to, so there will never be any dependencies.
	// In this and other situations, the pointers may be both NoAlias and
	// MustAlias at the same time. The current API can only return one result,
	// though this is rarely a problem in practice.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
	#define LLVM_ANALYSIS_ALIASANALYSIS_H

	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/Sequence.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/ModRef.h"
	#include <cstdint>
	#include <functional>
	#include <memory>
	#include <optional>
	#include <vector>

	namespace llvm {

	class AnalysisUsage;
	class AtomicCmpXchgInst;
	class BasicAAResult;
	class BasicBlock;
	class CatchPadInst;
	class CatchReturnInst;
	class DominatorTree;
	class FenceInst;
	class Function;
	class LoopInfo;
	class PreservedAnalyses;
	class TargetLibraryInfo;
	class Value;
	template <typename> class SmallPtrSetImpl;

	/// The possible results of an alias query.
	///
	/// These results are always computed between two MemoryLocation objects as
	/// a query to some alias analysis.
	///
	/// Note that these are unscoped enumerations because we would like to support
	/// implicitly testing a result for the existence of any possible aliasing with
	/// a conversion to bool, but an "enum class" doesn't support this. The
	/// canonical names from the literature are suffixed and unique anyways, and so
	/// they serve as global constants in LLVM for these results.
	///
	/// See docs/AliasAnalysis.html for more information on the specific meanings
	/// of these values.
	class AliasResult {
	private:
	static const int OffsetBits = 23;
	static const int AliasBits = 8;
	static_assert(AliasBits + 1 + OffsetBits <= 32,
	"AliasResult size is intended to be 4 bytes!");

	unsigned int Alias : AliasBits;
	unsigned int HasOffset : 1;
	signed int Offset : OffsetBits;

	public:
	enum Kind : uint8_t {
	/// The two locations do not alias at all.
	///
	/// This value is arranged to convert to false, while all other values
	/// convert to true. This allows a boolean context to convert the result to
	/// a binary flag indicating whether there is the possibility of aliasing.
	NoAlias = 0,
	/// The two locations may or may not alias. This is the least precise
	/// result.
	MayAlias,
	/// The two locations alias, but only due to a partial overlap.
	PartialAlias,
	/// The two locations precisely alias each other.
	MustAlias,
	};
	static_assert(MustAlias < (1 << AliasBits),
	"Not enough bit field size for the enum!");

	explicit AliasResult() = delete;
	constexpr AliasResult(const Kind &Alias)
	: Alias(Alias), HasOffset(false), Offset(0) {}

	operator Kind() const { return static_cast<Kind>(Alias); }

	+ bool operator==(const AliasResult &Other) const {
	+ return Alias == Other.Alias && HasOffset == Other.HasOffset &&
	+ Offset == Other.Offset;
	+ }
	+ bool operator!=(const AliasResult &Other) const { return !(*this == Other); }
	+
	+ bool operator==(Kind K) const { return Alias == K; }
	+ bool operator!=(Kind K) const { return !(*this == K); }
	+
	constexpr bool hasOffset() const { return HasOffset; }
	constexpr int32_t getOffset() const {
	assert(HasOffset && "No offset!");
	return Offset;
	}
	void setOffset(int32_t NewOffset) {
	if (isInt<OffsetBits>(NewOffset)) {
	HasOffset = true;
	Offset = NewOffset;
	}
	}

	/// Helper for processing AliasResult for swapped memory location pairs.
	void swap(bool DoSwap = true) {
	if (DoSwap && hasOffset())
	setOffset(-getOffset());
	}
	};

	static_assert(sizeof(AliasResult) == 4,
	"AliasResult size is intended to be 4 bytes!");

	/// << operator for AliasResult.
	raw_ostream &operator<<(raw_ostream &OS, AliasResult AR);

	/// Virtual base class for providers of capture information.
	struct CaptureInfo {
	virtual ~CaptureInfo() = 0;
	virtual bool isNotCapturedBeforeOrAt(const Value *Object,
	const Instruction *I) = 0;
	};

	/// Context-free CaptureInfo provider, which computes and caches whether an
	/// object is captured in the function at all, but does not distinguish whether
	/// it was captured before or after the context instruction.
	class SimpleCaptureInfo final : public CaptureInfo {
	SmallDenseMap<const Value *, bool, 8> IsCapturedCache;

	public:
	bool isNotCapturedBeforeOrAt(const Value *Object,
	const Instruction *I) override;
	};

	/// Context-sensitive CaptureInfo provider, which computes and caches the
	/// earliest common dominator closure of all captures. It provides a good
	/// approximation to a precise "captures before" analysis.
	class EarliestEscapeInfo final : public CaptureInfo {
	DominatorTree &DT;
	const LoopInfo &LI;

	/// Map from identified local object to an instruction before which it does
	/// not escape, or nullptr if it never escapes. The "earliest" instruction
	/// may be a conservative approximation, e.g. the first instruction in the
	/// function is always a legal choice.
	DenseMap<const Value , Instruction > EarliestEscapes;

	/// Reverse map from instruction to the objects it is the earliest escape for.
	/// This is used for cache invalidation purposes.
	DenseMap<Instruction , TinyPtrVector<const Value >> Inst2Obj;

	const SmallPtrSetImpl<const Value *> &EphValues;

	public:
	EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI,
	const SmallPtrSetImpl<const Value *> &EphValues)
	: DT(DT), LI(LI), EphValues(EphValues) {}

	bool isNotCapturedBeforeOrAt(const Value *Object,
	const Instruction *I) override;

	void removeInstruction(Instruction *I);
	};

	/// Cache key for BasicAA results. It only includes the pointer and size from
	/// MemoryLocation, as BasicAA is AATags independent. Additionally, it includes
	/// the value of MayBeCrossIteration, which may affect BasicAA results.
	struct AACacheLoc {
	using PtrTy = PointerIntPair<const Value *, 1, bool>;
	PtrTy Ptr;
	LocationSize Size;

	AACacheLoc(PtrTy Ptr, LocationSize Size) : Ptr(Ptr), Size(Size) {}
	AACacheLoc(const Value *Ptr, LocationSize Size, bool MayBeCrossIteration)
	: Ptr(Ptr, MayBeCrossIteration), Size(Size) {}
	};

	template <> struct DenseMapInfo<AACacheLoc> {
	static inline AACacheLoc getEmptyKey() {
	return {DenseMapInfo<AACacheLoc::PtrTy>::getEmptyKey(),
	DenseMapInfo<LocationSize>::getEmptyKey()};
	}
	static inline AACacheLoc getTombstoneKey() {
	return {DenseMapInfo<AACacheLoc::PtrTy>::getTombstoneKey(),
	DenseMapInfo<LocationSize>::getTombstoneKey()};
	}
	static unsigned getHashValue(const AACacheLoc &Val) {
	return DenseMapInfo<AACacheLoc::PtrTy>::getHashValue(Val.Ptr) ^
	DenseMapInfo<LocationSize>::getHashValue(Val.Size);
	}
	static bool isEqual(const AACacheLoc &LHS, const AACacheLoc &RHS) {
	return LHS.Ptr == RHS.Ptr && LHS.Size == RHS.Size;
	}
	};

	class AAResults;

	/// This class stores info we want to provide to or retain within an alias
	/// query. By default, the root query is stateless and starts with a freshly
	/// constructed info object. Specific alias analyses can use this query info to
	/// store per-query state that is important for recursive or nested queries to
	/// avoid recomputing. To enable preserving this state across multiple queries
	/// where safe (due to the IR not changing), use a `BatchAAResults` wrapper.
	/// The information stored in an `AAQueryInfo` is currently limitted to the
	/// caches used by BasicAA, but can further be extended to fit other AA needs.
	class AAQueryInfo {
	public:
	using LocPair = std::pair<AACacheLoc, AACacheLoc>;
	struct CacheEntry {
	AliasResult Result;
	/// Number of times a NoAlias assumption has been used.
	/// 0 for assumptions that have not been used, -1 for definitive results.
	int NumAssumptionUses;
	/// Whether this is a definitive (non-assumption) result.
	bool isDefinitive() const { return NumAssumptionUses < 0; }
	};

	// Alias analysis result aggregration using which this query is performed.
	// Can be used to perform recursive queries.
	AAResults &AAR;

	using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>;
	AliasCacheT AliasCache;

	CaptureInfo *CI;

	/// Query depth used to distinguish recursive queries.
	unsigned Depth = 0;

	/// How many active NoAlias assumption uses there are.
	int NumAssumptionUses = 0;

	/// Location pairs for which an assumption based result is currently stored.
	/// Used to remove all potentially incorrect results from the cache if an
	/// assumption is disproven.
	SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;

	/// Tracks whether the accesses may be on different cycle iterations.
	///
	/// When interpret "Value" pointer equality as value equality we need to make
	/// sure that the "Value" is not part of a cycle. Otherwise, two uses could
	/// come from different "iterations" of a cycle and see different values for
	/// the same "Value" pointer.
	///
	/// The following example shows the problem:
	/// %p = phi(%alloca1, %addr2)
	/// %l = load %ptr
	/// %addr1 = gep, %alloca2, 0, %l
	/// %addr2 = gep %alloca2, 0, (%l + 1)
	/// alias(%p, %addr1) -> MayAlias !
	/// store %l, ...
	bool MayBeCrossIteration = false;

	AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {}
	};

	/// AAQueryInfo that uses SimpleCaptureInfo.
	class SimpleAAQueryInfo : public AAQueryInfo {
	SimpleCaptureInfo CI;

	public:
	SimpleAAQueryInfo(AAResults &AAR) : AAQueryInfo(AAR, &CI) {}
	};

	class BatchAAResults;

	class AAResults {
	public:
	// Make these results default constructable and movable. We have to spell
	// these out because MSVC won't synthesize them.
	AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {}
	AAResults(AAResults &&Arg);
	~AAResults();

	/// Register a specific AA result.
	template <typename AAResultT> void addAAResult(AAResultT &AAResult) {
	// FIXME: We should use a much lighter weight system than the usual
	// polymorphic pattern because we don't own AAResult. It should
	// ideally involve two pointers and no separate allocation.
	AAs.emplace_back(new Model<AAResultT>(AAResult, *this));
	}

	/// Register a function analysis ID that the results aggregation depends on.
	///
	/// This is used in the new pass manager to implement the invalidation logic
	/// where we must invalidate the results aggregation if any of our component
	/// analyses become invalid.
	void addAADependencyID(AnalysisKey *ID) { AADeps.push_back(ID); }

	/// Handle invalidation events in the new pass manager.
	///
	/// The aggregation is invalidated if any of the underlying analyses is
	/// invalidated.
	bool invalidate(Function &F, const PreservedAnalyses &PA,
	FunctionAnalysisManager::Invalidator &Inv);

	//===--------------------------------------------------------------------===//
	/// \name Alias Queries
	/// @{

	/// The main low level interface to the alias analysis implementation.
	/// Returns an AliasResult indicating whether the two pointers are aliased to
	/// each other. This is the interface that must be implemented by specific
	/// alias analysis implementations.
	AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);

	/// A convenience wrapper around the primary \c alias interface.
	AliasResult alias(const Value V1, LocationSize V1Size, const Value V2,
	LocationSize V2Size) {
	return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
	}

	/// A convenience wrapper around the primary \c alias interface.
	AliasResult alias(const Value V1, const Value V2) {
	return alias(MemoryLocation::getBeforeOrAfter(V1),
	MemoryLocation::getBeforeOrAfter(V2));
	}

	/// A trivial helper function to check to see if the specified pointers are
	/// no-alias.
	bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
	return alias(LocA, LocB) == AliasResult::NoAlias;
	}

	/// A convenience wrapper around the \c isNoAlias helper interface.
	bool isNoAlias(const Value V1, LocationSize V1Size, const Value V2,
	LocationSize V2Size) {
	return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
	}

	/// A convenience wrapper around the \c isNoAlias helper interface.
	bool isNoAlias(const Value V1, const Value V2) {
	return isNoAlias(MemoryLocation::getBeforeOrAfter(V1),
	MemoryLocation::getBeforeOrAfter(V2));
	}

	/// A trivial helper function to check to see if the specified pointers are
	/// must-alias.
	bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
	return alias(LocA, LocB) == AliasResult::MustAlias;
	}

	/// A convenience wrapper around the \c isMustAlias helper interface.
	bool isMustAlias(const Value V1, const Value V2) {
	return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) ==
	AliasResult::MustAlias;
	}

	/// Checks whether the given location points to constant memory, or if
	/// \p OrLocal is true whether it points to a local alloca.
	bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) {
	return isNoModRef(getModRefInfoMask(Loc, OrLocal));
	}

	/// A convenience wrapper around the primary \c pointsToConstantMemory
	/// interface.
	bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
	return pointsToConstantMemory(MemoryLocation::getBeforeOrAfter(P), OrLocal);
	}

	/// @}
	//===--------------------------------------------------------------------===//
	/// \name Simple mod/ref information
	/// @{

	/// Returns a bitmask that should be unconditionally applied to the ModRef
	/// info of a memory location. This allows us to eliminate Mod and/or Ref
	/// from the ModRef info based on the knowledge that the memory location
	/// points to constant and/or locally-invariant memory.
	///
	/// If IgnoreLocals is true, then this method returns NoModRef for memory
	/// that points to a local alloca.
	ModRefInfo getModRefInfoMask(const MemoryLocation &Loc,
	bool IgnoreLocals = false);

	/// A convenience wrapper around the primary \c getModRefInfoMask
	/// interface.
	ModRefInfo getModRefInfoMask(const Value *P, bool IgnoreLocals = false) {
	return getModRefInfoMask(MemoryLocation::getBeforeOrAfter(P), IgnoreLocals);
	}

	/// Get the ModRef info associated with a pointer argument of a call. The
	/// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
	/// that these bits do not necessarily account for the overall behavior of
	/// the function, but rather only provide additional per-argument
	/// information.
	ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);

	/// Return the behavior of the given call site.
	MemoryEffects getMemoryEffects(const CallBase *Call);

	/// Return the behavior when calling the given function.
	MemoryEffects getMemoryEffects(const Function *F);

	/// Checks if the specified call is known to never read or write memory.
	///
	/// Note that if the call only reads from known-constant memory, it is also
	/// legal to return true. Also, calls that unwind the stack are legal for
	/// this predicate.
	///
	/// Many optimizations (such as CSE and LICM) can be performed on such calls
	/// without worrying about aliasing properties, and many calls have this
	/// property (e.g. calls to 'sin' and 'cos').
	///
	/// This property corresponds to the GCC 'const' attribute.
	bool doesNotAccessMemory(const CallBase *Call) {
	return getMemoryEffects(Call).doesNotAccessMemory();
	}

	/// Checks if the specified function is known to never read or write memory.
	///
	/// Note that if the function only reads from known-constant memory, it is
	/// also legal to return true. Also, function that unwind the stack are legal
	/// for this predicate.
	///
	/// Many optimizations (such as CSE and LICM) can be performed on such calls
	/// to such functions without worrying about aliasing properties, and many
	/// functions have this property (e.g. 'sin' and 'cos').
	///
	/// This property corresponds to the GCC 'const' attribute.
	bool doesNotAccessMemory(const Function *F) {
	return getMemoryEffects(F).doesNotAccessMemory();
	}

	/// Checks if the specified call is known to only read from non-volatile
	/// memory (or not access memory at all).
	///
	/// Calls that unwind the stack are legal for this predicate.
	///
	/// This property allows many common optimizations to be performed in the
	/// absence of interfering store instructions, such as CSE of strlen calls.
	///
	/// This property corresponds to the GCC 'pure' attribute.
	bool onlyReadsMemory(const CallBase *Call) {
	return getMemoryEffects(Call).onlyReadsMemory();
	}

	/// Checks if the specified function is known to only read from non-volatile
	/// memory (or not access memory at all).
	///
	/// Functions that unwind the stack are legal for this predicate.
	///
	/// This property allows many common optimizations to be performed in the
	/// absence of interfering store instructions, such as CSE of strlen calls.
	///
	/// This property corresponds to the GCC 'pure' attribute.
	bool onlyReadsMemory(const Function *F) {
	return getMemoryEffects(F).onlyReadsMemory();
	}

	/// Check whether or not an instruction may read or write the optionally
	/// specified memory location.
	///
	///
	/// An instruction that doesn't read or write memory may be trivially LICM'd
	/// for example.
	///
	/// For function calls, this delegates to the alias-analysis specific
	/// call-site mod-ref behavior queries. Otherwise it delegates to the specific
	/// helpers above.
	ModRefInfo getModRefInfo(const Instruction *I,
	const std::optional<MemoryLocation> &OptLoc) {
	SimpleAAQueryInfo AAQIP(*this);
	return getModRefInfo(I, OptLoc, AAQIP);
	}

	/// A convenience wrapper for constructing the memory location.
	ModRefInfo getModRefInfo(const Instruction I, const Value P,
	LocationSize Size) {
	return getModRefInfo(I, MemoryLocation(P, Size));
	}

	/// Return information about whether a call and an instruction may refer to
	/// the same memory locations.
	ModRefInfo getModRefInfo(const Instruction I, const CallBase Call);

	/// Return information about whether a particular call site modifies
	/// or reads the specified memory location \p MemLoc before instruction \p I
	/// in a BasicBlock.
	ModRefInfo callCapturesBefore(const Instruction *I,
	const MemoryLocation &MemLoc,
	DominatorTree *DT) {
	SimpleAAQueryInfo AAQIP(*this);
	return callCapturesBefore(I, MemLoc, DT, AAQIP);
	}

	/// A convenience wrapper to synthesize a memory location.
	ModRefInfo callCapturesBefore(const Instruction I, const Value P,
	LocationSize Size, DominatorTree *DT) {
	return callCapturesBefore(I, MemoryLocation(P, Size), DT);
	}

	/// @}
	//===--------------------------------------------------------------------===//
	/// \name Higher level methods for querying mod/ref information.
	/// @{

	/// Check if it is possible for execution of the specified basic block to
	/// modify the location Loc.
	bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc);

	/// A convenience wrapper synthesizing a memory location.
	bool canBasicBlockModify(const BasicBlock &BB, const Value *P,
	LocationSize Size) {
	return canBasicBlockModify(BB, MemoryLocation(P, Size));
	}

	/// Check if it is possible for the execution of the specified instructions
	/// to mod\ref (according to the mode) the location Loc.
	///
	/// The instructions to consider are all of the instructions in the range of
	/// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block.
	bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
	const MemoryLocation &Loc,
	const ModRefInfo Mode);

	/// A convenience wrapper synthesizing a memory location.
	bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
	const Value *Ptr, LocationSize Size,
	const ModRefInfo Mode) {
	return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode);
	}

	// CtxI can be nullptr, in which case the query is whether or not the aliasing
	// relationship holds through the entire function.
	AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
	AAQueryInfo &AAQI, const Instruction *CtxI = nullptr);

	bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
	bool OrLocal = false);
	ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
	bool IgnoreLocals = false);
	ModRefInfo getModRefInfo(const Instruction I, const CallBase Call2,
	AAQueryInfo &AAQIP);
	ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const CallBase Call1, const CallBase Call2,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
	const MemoryLocation &Loc, AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc,
	AAQueryInfo &AAQI);
	ModRefInfo getModRefInfo(const Instruction *I,
	const std::optional<MemoryLocation> &OptLoc,
	AAQueryInfo &AAQIP);
	ModRefInfo callCapturesBefore(const Instruction *I,
	const MemoryLocation &MemLoc, DominatorTree *DT,
	AAQueryInfo &AAQIP);
	MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI);

	private:
	class Concept;

	template <typename T> class Model;

	friend class AAResultBase;

	const TargetLibraryInfo &TLI;

	std::vector<std::unique_ptr<Concept>> AAs;

	std::vector<AnalysisKey *> AADeps;

	friend class BatchAAResults;
	};

	/// This class is a wrapper over an AAResults, and it is intended to be used
	/// only when there are no IR changes inbetween queries. BatchAAResults is
	/// reusing the same `AAQueryInfo` to preserve the state across queries,
	/// esentially making AA work in "batch mode". The internal state cannot be
	/// cleared, so to go "out-of-batch-mode", the user must either use AAResults,
	/// or create a new BatchAAResults.
	class BatchAAResults {
	AAResults &AA;
	AAQueryInfo AAQI;
	SimpleCaptureInfo SimpleCI;

	public:
	BatchAAResults(AAResults &AAR) : AA(AAR), AAQI(AAR, &SimpleCI) {}
	BatchAAResults(AAResults &AAR, CaptureInfo *CI) : AA(AAR), AAQI(AAR, CI) {}

	AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
	return AA.alias(LocA, LocB, AAQI);
	}
	bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) {
	return AA.pointsToConstantMemory(Loc, AAQI, OrLocal);
	}
	ModRefInfo getModRefInfoMask(const MemoryLocation &Loc,
	bool IgnoreLocals = false) {
	return AA.getModRefInfoMask(Loc, AAQI, IgnoreLocals);
	}
	ModRefInfo getModRefInfo(const Instruction *I,
	const std::optional<MemoryLocation> &OptLoc) {
	return AA.getModRefInfo(I, OptLoc, AAQI);
	}
	ModRefInfo getModRefInfo(const Instruction I, const CallBase Call2) {
	return AA.getModRefInfo(I, Call2, AAQI);
	}
	ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
	return AA.getArgModRefInfo(Call, ArgIdx);
	}
	MemoryEffects getMemoryEffects(const CallBase *Call) {
	return AA.getMemoryEffects(Call, AAQI);
	}
	bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
	return alias(LocA, LocB) == AliasResult::MustAlias;
	}
	bool isMustAlias(const Value V1, const Value V2) {
	return alias(MemoryLocation(V1, LocationSize::precise(1)),
	MemoryLocation(V2, LocationSize::precise(1))) ==
	AliasResult::MustAlias;
	}
	ModRefInfo callCapturesBefore(const Instruction *I,
	const MemoryLocation &MemLoc,
	DominatorTree *DT) {
	return AA.callCapturesBefore(I, MemLoc, DT, AAQI);
	}

	/// Assume that values may come from different cycle iterations.
	void enableCrossIterationMode() {
	AAQI.MayBeCrossIteration = true;
	}
	};

	/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
	/// pointer or reference.
	using AliasAnalysis = AAResults;

	/// A private abstract base class describing the concept of an individual alias
	/// analysis implementation.
	///
	/// This interface is implemented by any \c Model instantiation. It is also the
	/// interface which a type used to instantiate the model must provide.
	///
	/// All of these methods model methods by the same name in the \c
	/// AAResults class. Only differences and specifics to how the
	/// implementations are called are documented here.
	class AAResults::Concept {
	public:
	virtual ~Concept() = 0;

	//===--------------------------------------------------------------------===//
	/// \name Alias Queries
	/// @{

	/// The main low level interface to the alias analysis implementation.
	/// Returns an AliasResult indicating whether the two pointers are aliased to
	/// each other. This is the interface that must be implemented by specific
	/// alias analysis implementations.
	virtual AliasResult alias(const MemoryLocation &LocA,
	const MemoryLocation &LocB, AAQueryInfo &AAQI,
	const Instruction *CtxI) = 0;

	/// @}
	//===--------------------------------------------------------------------===//
	/// \name Simple mod/ref information
	/// @{

	/// Returns a bitmask that should be unconditionally applied to the ModRef
	/// info of a memory location. This allows us to eliminate Mod and/or Ref from
	/// the ModRef info based on the knowledge that the memory location points to
	/// constant and/or locally-invariant memory.
	virtual ModRefInfo getModRefInfoMask(const MemoryLocation &Loc,
	AAQueryInfo &AAQI,
	bool IgnoreLocals) = 0;

	/// Get the ModRef info associated with a pointer argument of a callsite. The
	/// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
	/// that these bits do not necessarily account for the overall behavior of
	/// the function, but rather only provide additional per-argument
	/// information.
	virtual ModRefInfo getArgModRefInfo(const CallBase *Call,
	unsigned ArgIdx) = 0;

	/// Return the behavior of the given call site.
	virtual MemoryEffects getMemoryEffects(const CallBase *Call,
	AAQueryInfo &AAQI) = 0;

	/// Return the behavior when calling the given function.
	virtual MemoryEffects getMemoryEffects(const Function *F) = 0;

	/// getModRefInfo (for call sites) - Return information about whether
	/// a particular call site modifies or reads the specified memory location.
	virtual ModRefInfo getModRefInfo(const CallBase *Call,
	const MemoryLocation &Loc,
	AAQueryInfo &AAQI) = 0;

	/// Return information about whether two call sites may refer to the same set
	/// of memory locations. See the AA documentation for details:
	/// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
	virtual ModRefInfo getModRefInfo(const CallBase Call1, const CallBase Call2,
	AAQueryInfo &AAQI) = 0;

	/// @}
	};

	/// A private class template which derives from \c Concept and wraps some other
	/// type.
	///
	/// This models the concept by directly forwarding each interface point to the
	/// wrapped type which must implement a compatible interface. This provides
	/// a type erased binding.
	template <typename AAResultT> class AAResults::Model final : public Concept {
	AAResultT &Result;

	public:
	explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) {}
	~Model() override = default;

	AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
	AAQueryInfo &AAQI, const Instruction *CtxI) override {
	return Result.alias(LocA, LocB, AAQI, CtxI);
	}

	ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
	bool IgnoreLocals) override {
	return Result.getModRefInfoMask(Loc, AAQI, IgnoreLocals);
	}

	ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
	return Result.getArgModRefInfo(Call, ArgIdx);
	}

	MemoryEffects getMemoryEffects(const CallBase *Call,
	AAQueryInfo &AAQI) override {
	return Result.getMemoryEffects(Call, AAQI);
	}

	MemoryEffects getMemoryEffects(const Function *F) override {
	return Result.getMemoryEffects(F);
	}

	ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
	AAQueryInfo &AAQI) override {
	return Result.getModRefInfo(Call, Loc, AAQI);
	}

	ModRefInfo getModRefInfo(const CallBase Call1, const CallBase Call2,
	AAQueryInfo &AAQI) override {
	return Result.getModRefInfo(Call1, Call2, AAQI);
	}
	};

	/// A base class to help implement the function alias analysis results concept.
	///
	/// Because of the nature of many alias analysis implementations, they often
	/// only implement a subset of the interface. This base class will attempt to
	/// implement the remaining portions of the interface in terms of simpler forms
	/// of the interface where possible, and otherwise provide conservatively
	/// correct fallback implementations.
	///
	/// Implementors of an alias analysis should derive from this class, and then
	/// override specific methods that they wish to customize. There is no need to
	/// use virtual anywhere.
	class AAResultBase {
	protected:
	explicit AAResultBase() = default;

	// Provide all the copy and move constructors so that derived types aren't
	// constrained.
	AAResultBase(const AAResultBase &Arg) {}
	AAResultBase(AAResultBase &&Arg) {}

	public:
	AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
	AAQueryInfo &AAQI, const Instruction *I) {
	return AliasResult::MayAlias;
	}

	ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
	bool IgnoreLocals) {
	return ModRefInfo::ModRef;
	}

	ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
	return ModRefInfo::ModRef;
	}

	MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI) {
	return MemoryEffects::unknown();
	}

	MemoryEffects getMemoryEffects(const Function *F) {
	return MemoryEffects::unknown();
	}

	ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
	AAQueryInfo &AAQI) {
	return ModRefInfo::ModRef;
	}

	ModRefInfo getModRefInfo(const CallBase Call1, const CallBase Call2,
	AAQueryInfo &AAQI) {
	return ModRefInfo::ModRef;
	}
	};

	/// Return true if this pointer is returned by a noalias function.
	bool isNoAliasCall(const Value *V);

	/// Return true if this pointer refers to a distinct and identifiable object.
	/// This returns true for:
	/// Global Variables and Functions (but not Global Aliases)
	/// Allocas
	/// ByVal and NoAlias Arguments
	/// NoAlias returns (e.g. calls to malloc)
	///
	bool isIdentifiedObject(const Value *V);

	/// Return true if V is umabigously identified at the function-level.
	/// Different IdentifiedFunctionLocals can't alias.
	/// Further, an IdentifiedFunctionLocal can not alias with any function
	/// arguments other than itself, which is not necessarily true for
	/// IdentifiedObjects.
	bool isIdentifiedFunctionLocal(const Value *V);

	/// Returns true if the pointer is one which would have been considered an
	/// escape by isNonEscapingLocalObject.
	bool isEscapeSource(const Value *V);

	/// Return true if Object memory is not visible after an unwind, in the sense
	/// that program semantics cannot depend on Object containing any particular
	/// value on unwind. If the RequiresNoCaptureBeforeUnwind out parameter is set
	/// to true, then the memory is only not visible if the object has not been
	/// captured prior to the unwind. Otherwise it is not visible even if captured.
	bool isNotVisibleOnUnwind(const Value *Object,
	bool &RequiresNoCaptureBeforeUnwind);

	/// A manager for alias analyses.
	///
	/// This class can have analyses registered with it and when run, it will run
	/// all of them and aggregate their results into single AA results interface
	/// that dispatches across all of the alias analysis results available.
	///
	/// Note that the order in which analyses are registered is very significant.
	/// That is the order in which the results will be aggregated and queried.
	///
	/// This manager effectively wraps the AnalysisManager for registering alias
	/// analyses. When you register your alias analysis with this manager, it will
	/// ensure the analysis itself is registered with its AnalysisManager.
	///
	/// The result of this analysis is only invalidated if one of the particular
	/// aggregated AA results end up being invalidated. This removes the need to
	/// explicitly preserve the results of `AAManager`. Note that analyses should no
	/// longer be registered once the `AAManager` is run.
	class AAManager : public AnalysisInfoMixin<AAManager> {
	public:
	using Result = AAResults;

	/// Register a specific AA result.
	template <typename AnalysisT> void registerFunctionAnalysis() {
	ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>);
	}

	/// Register a specific AA result.
	template <typename AnalysisT> void registerModuleAnalysis() {
	ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>);
	}

	Result run(Function &F, FunctionAnalysisManager &AM);

	private:
	friend AnalysisInfoMixin<AAManager>;

	static AnalysisKey Key;

	SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM,
	AAResults &AAResults),
	4> ResultGetters;

	template <typename AnalysisT>
	static void getFunctionAAResultImpl(Function &F,
	FunctionAnalysisManager &AM,
	AAResults &AAResults) {
	AAResults.addAAResult(AM.template getResult<AnalysisT>(F));
	AAResults.addAADependencyID(AnalysisT::ID());
	}

	template <typename AnalysisT>
	static void getModuleAAResultImpl(Function &F, FunctionAnalysisManager &AM,
	AAResults &AAResults) {
	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
	if (auto *R =
	MAMProxy.template getCachedResult<AnalysisT>(*F.getParent())) {
	AAResults.addAAResult(*R);
	MAMProxy
	.template registerOuterAnalysisInvalidation<AnalysisT, AAManager>();
	}
	}
	};

	/// A wrapper pass to provide the legacy pass manager access to a suitably
	/// prepared AAResults object.
	class AAResultsWrapperPass : public FunctionPass {
	std::unique_ptr<AAResults> AAR;

	public:
	static char ID;

	AAResultsWrapperPass();

	AAResults &getAAResults() { return *AAR; }
	const AAResults &getAAResults() const { return *AAR; }

	bool runOnFunction(Function &F) override;

	void getAnalysisUsage(AnalysisUsage &AU) const override;
	};

	/// A wrapper pass for external alias analyses. This just squirrels away the
	/// callback used to run any analyses and register their results.
	struct ExternalAAWrapperPass : ImmutablePass {
	using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;

	CallbackT CB;

	static char ID;

	ExternalAAWrapperPass();

	explicit ExternalAAWrapperPass(CallbackT CB);

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesAll();
	}
	};

	FunctionPass *createAAResultsWrapperPass();

	/// A wrapper pass around a callback which can be used to populate the
	/// AAResults in the AAResultsWrapperPass from an external AA.
	///
	/// The callback provided here will be used each time we prepare an AAResults
	/// object, and will receive a reference to the function wrapper pass, the
	/// function, and the AAResults object to populate. This should be used when
	/// setting up a custom pass pipeline to inject a hook into the AA results.
	ImmutablePass *createExternalAAWrapperPass(
	std::function<void(Pass &, Function &, AAResults &)> Callback);

	/// A helper for the legacy pass manager to create a \c AAResults
	/// object populated to the best of our ability for a particular function when
	/// inside of a \c ModulePass or a \c CallGraphSCCPass.
	///
	/// If a \c ModulePass or a \c CallGraphSCCPass calls \p
	/// createLegacyPMAAResults, it also needs to call \p addUsedAAAnalyses in \p
	/// getAnalysisUsage.
	AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR);

	/// A helper for the legacy pass manager to populate \p AU to add uses to make
	/// sure the analyses required by \p createLegacyPMAAResults are available.
	void getAAResultsAnalysisUsage(AnalysisUsage &AU);

	} // end namespace llvm

	#endif // LLVM_ANALYSIS_ALIASANALYSIS_H
	diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
	index 8fcfbdbd6665..951945f7b765 100644
	--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
	+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
	@@ -1,586 +1,586 @@
	//===-- TargetLibraryInfo.h - Library information ---------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_TARGETLIBRARYINFO_H
	#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H

	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/Pass.h"
	#include <optional>

	namespace llvm {

	template <typename T> class ArrayRef;
	class Function;
	class Module;
	class Triple;

	/// Describes a possible vectorization of a function.
	/// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized
	/// by a factor 'VectorizationFactor'.
	struct VecDesc {
	StringRef ScalarFnName;
	StringRef VectorFnName;
	ElementCount VectorizationFactor;
	};

	enum LibFunc : unsigned {
	#define TLI_DEFINE_ENUM
	#include "llvm/Analysis/TargetLibraryInfo.def"

	NumLibFuncs,
	NotLibFunc
	};

	/// Implementation of the target library information.
	///
	/// This class constructs tables that hold the target library information and
	/// make it available. However, it is somewhat expensive to compute and only
	/// depends on the triple. So users typically interact with the \c
	/// TargetLibraryInfo wrapper below.
	class TargetLibraryInfoImpl {
	friend class TargetLibraryInfo;

	unsigned char AvailableArray[(NumLibFuncs+3)/4];
	DenseMap<unsigned, std::string> CustomNames;
	static StringLiteral const StandardNames[NumLibFuncs];
	bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param, ShouldSignExtI32Return;
	unsigned SizeOfInt;

	enum AvailabilityState {
	StandardName = 3, // (memset to all ones)
	CustomName = 1,
	Unavailable = 0 // (memset to all zeros)
	};
	void setState(LibFunc F, AvailabilityState State) {
	AvailableArray[F/4] &= ~(3 << 2*(F&3));
	AvailableArray[F/4] \|= State << 2*(F&3);
	}
	AvailabilityState getState(LibFunc F) const {
	return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3);
	}

	/// Vectorization descriptors - sorted by ScalarFnName.
	std::vector<VecDesc> VectorDescs;
	/// Scalarization descriptors - same content as VectorDescs but sorted based
	/// on VectorFnName rather than ScalarFnName.
	std::vector<VecDesc> ScalarDescs;

	/// Return true if the function type FTy is valid for the library function
	/// F, regardless of whether the function is available.
	bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
	const Module &M) const;

	public:
	/// List of known vector-functions libraries.
	///
	/// The vector-functions library defines, which functions are vectorizable
	/// and with which factor. The library can be specified by either frontend,
	/// or a commandline option, and then used by
	/// addVectorizableFunctionsFromVecLib for filling up the tables of
	/// vectorizable functions.
	enum VectorLibrary {
	NoLibrary, // Don't use any vector library.
	Accelerate, // Use Accelerate framework.
	DarwinLibSystemM, // Use Darwin's libsystem_m.
	LIBMVEC_X86, // GLIBC Vector Math library.
	MASSV, // IBM MASS vector library.
	SVML, // Intel short vector math library.
	SLEEFGNUABI // SLEEF - SIMD Library for Evaluating Elementary Functions.
	};

	TargetLibraryInfoImpl();
	explicit TargetLibraryInfoImpl(const Triple &T);

	// Provide value semantics.
	TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI);
	TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI);
	TargetLibraryInfoImpl &operator=(const TargetLibraryInfoImpl &TLI);
	TargetLibraryInfoImpl &operator=(TargetLibraryInfoImpl &&TLI);

	/// Searches for a particular function name.
	///
	/// If it is one of the known library functions, return true and set F to the
	/// corresponding value.
	bool getLibFunc(StringRef funcName, LibFunc &F) const;

	/// Searches for a particular function name, also checking that its type is
	/// valid for the library function matching that name.
	///
	/// If it is one of the known library functions, return true and set F to the
	/// corresponding value.
	///
	/// FDecl is assumed to have a parent Module when using this function.
	bool getLibFunc(const Function &FDecl, LibFunc &F) const;

	/// Forces a function to be marked as unavailable.
	void setUnavailable(LibFunc F) {
	setState(F, Unavailable);
	}

	/// Forces a function to be marked as available.
	void setAvailable(LibFunc F) {
	setState(F, StandardName);
	}

	/// Forces a function to be marked as available and provide an alternate name
	/// that must be used.
	void setAvailableWithName(LibFunc F, StringRef Name) {
	if (StandardNames[F] != Name) {
	setState(F, CustomName);
	CustomNames[F] = std::string(Name);
	assert(CustomNames.find(F) != CustomNames.end());
	} else {
	setState(F, StandardName);
	}
	}

	/// Disables all builtins.
	///
	/// This can be used for options like -fno-builtin.
	void disableAllFunctions();

	/// Add a set of scalar -> vector mappings, queryable via
	/// getVectorizedFunction and getScalarizedFunction.
	void addVectorizableFunctions(ArrayRef<VecDesc> Fns);

	/// Calls addVectorizableFunctions with a known preset of functions for the
	/// given vector library.
	void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib,
	const llvm::Triple &TargetTriple);

	/// Return true if the function F has a vector equivalent with vectorization
	/// factor VF.
	bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
	return !getVectorizedFunction(F, VF).empty();
	}

	/// Return true if the function F has a vector equivalent with any
	/// vectorization factor.
	bool isFunctionVectorizable(StringRef F) const;

	/// Return the name of the equivalent of F, vectorized with factor VF. If no
	/// such mapping exists, return the empty string.
	StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;

	/// Set to true iff i32 parameters to library functions should have signext
	/// or zeroext attributes if they correspond to C-level int or unsigned int,
	/// respectively.
	void setShouldExtI32Param(bool Val) {
	ShouldExtI32Param = Val;
	}

	/// Set to true iff i32 results from library functions should have signext
	/// or zeroext attributes if they correspond to C-level int or unsigned int,
	/// respectively.
	void setShouldExtI32Return(bool Val) {
	ShouldExtI32Return = Val;
	}

	/// Set to true iff i32 parameters to library functions should have signext
	/// attribute if they correspond to C-level int or unsigned int.
	void setShouldSignExtI32Param(bool Val) {
	ShouldSignExtI32Param = Val;
	}

	/// Set to true iff i32 results from library functions should have signext
	/// attribute if they correspond to C-level int or unsigned int.
	void setShouldSignExtI32Return(bool Val) {
	ShouldSignExtI32Return = Val;
	}

	/// Returns the size of the wchar_t type in bytes or 0 if the size is unknown.
	/// This queries the 'wchar_size' metadata.
	unsigned getWCharSize(const Module &M) const;

	/// Returns the size of the size_t type in bits.
	unsigned getSizeTSize(const Module &M) const;

	/// Get size of a C-level int or unsigned int, in bits.
	unsigned getIntSize() const {
	return SizeOfInt;
	}

	/// Initialize the C-level size of an integer.
	void setIntSize(unsigned Bits) {
	SizeOfInt = Bits;
	}

	/// Returns the largest vectorization factor used in the list of
	/// vector functions.
	void getWidestVF(StringRef ScalarF, ElementCount &FixedVF,
	ElementCount &Scalable) const;

	/// Returns true if call site / callee has cdecl-compatible calling
	/// conventions.
	static bool isCallingConvCCompatible(CallBase *CI);
	static bool isCallingConvCCompatible(Function *Callee);
	};

	/// Provides information about what library functions are available for
	/// the current target.
	///
	/// This both allows optimizations to handle them specially and frontends to
	/// disable such optimizations through -fno-builtin etc.
	class TargetLibraryInfo {
	friend class TargetLibraryAnalysis;
	friend class TargetLibraryInfoWrapperPass;

	/// The global (module level) TLI info.
	const TargetLibraryInfoImpl *Impl;

	/// Support for -fno-builtin* options as function attributes, overrides
	/// information in global TargetLibraryInfoImpl.
	BitVector OverrideAsUnavailable;

	public:
	explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl,
	std::optional<const Function *> F = std::nullopt)
	: Impl(&Impl), OverrideAsUnavailable(NumLibFuncs) {
	if (!F)
	return;
	if ((*F)->hasFnAttribute("no-builtins"))
	disableAllFunctions();
	else {
	// Disable individual libc/libm calls in TargetLibraryInfo.
	LibFunc LF;
	AttributeSet FnAttrs = (*F)->getAttributes().getFnAttrs();
	for (const Attribute &Attr : FnAttrs) {
	if (!Attr.isStringAttribute())
	continue;
	auto AttrStr = Attr.getKindAsString();
	if (!AttrStr.consume_front("no-builtin-"))
	continue;
	if (getLibFunc(AttrStr, LF))
	setUnavailable(LF);
	}
	}
	}

	// Provide value semantics.
	TargetLibraryInfo(const TargetLibraryInfo &TLI) = default;
	TargetLibraryInfo(TargetLibraryInfo &&TLI)
	: Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
	TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) = default;
	TargetLibraryInfo &operator=(TargetLibraryInfo &&TLI) {
	Impl = TLI.Impl;
	OverrideAsUnavailable = TLI.OverrideAsUnavailable;
	return *this;
	}

	/// Determine whether a callee with the given TLI can be inlined into
	/// caller with this TLI, based on 'nobuiltin' attributes. When requested,
	/// allow inlining into a caller with a superset of the callee's nobuiltin
	/// attributes, which is conservatively correct.
	bool areInlineCompatible(const TargetLibraryInfo &CalleeTLI,
	bool AllowCallerSuperset) const {
	if (!AllowCallerSuperset)
	return OverrideAsUnavailable == CalleeTLI.OverrideAsUnavailable;
	BitVector B = OverrideAsUnavailable;
	B \|= CalleeTLI.OverrideAsUnavailable;
	// We can inline if the union of the caller and callee's nobuiltin
	// attributes is no stricter than the caller's nobuiltin attributes.
	return B == OverrideAsUnavailable;
	}

	/// Return true if the function type FTy is valid for the library function
	/// F, regardless of whether the function is available.
	bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
	const Module &M) const {
	return Impl->isValidProtoForLibFunc(FTy, F, M);
	}

	/// Searches for a particular function name.
	///
	/// If it is one of the known library functions, return true and set F to the
	/// corresponding value.
	bool getLibFunc(StringRef funcName, LibFunc &F) const {
	return Impl->getLibFunc(funcName, F);
	}

	bool getLibFunc(const Function &FDecl, LibFunc &F) const {
	return Impl->getLibFunc(FDecl, F);
	}

	/// If a callbase does not have the 'nobuiltin' attribute, return if the
	/// called function is a known library function and set F to that function.
	bool getLibFunc(const CallBase &CB, LibFunc &F) const {
	return !CB.isNoBuiltin() && CB.getCalledFunction() &&
	getLibFunc(*(CB.getCalledFunction()), F);
	}

	/// Disables all builtins.
	///
	/// This can be used for options like -fno-builtin.
	void disableAllFunctions() LLVM_ATTRIBUTE_UNUSED {
	OverrideAsUnavailable.set();
	}

	/// Forces a function to be marked as unavailable.
	void setUnavailable(LibFunc F) LLVM_ATTRIBUTE_UNUSED {
	OverrideAsUnavailable.set(F);
	}

	TargetLibraryInfoImpl::AvailabilityState getState(LibFunc F) const {
	if (OverrideAsUnavailable[F])
	return TargetLibraryInfoImpl::Unavailable;
	return Impl->getState(F);
	}

	/// Tests whether a library function is available.
	bool has(LibFunc F) const {
	return getState(F) != TargetLibraryInfoImpl::Unavailable;
	}
	bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
	return Impl->isFunctionVectorizable(F, VF);
	}
	bool isFunctionVectorizable(StringRef F) const {
	return Impl->isFunctionVectorizable(F);
	}
	StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
	return Impl->getVectorizedFunction(F, VF);
	}

	/// Tests if the function is both available and a candidate for optimized code
	/// generation.
	bool hasOptimizedCodeGen(LibFunc F) const {
	if (getState(F) == TargetLibraryInfoImpl::Unavailable)
	return false;
	switch (F) {
	default: break;
	case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
	case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
	case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
	case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
	case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
	case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
	case LibFunc_sqrtl_finite:
	case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl:
	case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl:
	case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl:
	case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl:
	case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill:
	case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl:
	case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl:
	case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
	case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
	case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
	case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove:
	case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
	case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
	case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
	return true;
	}
	return false;
	}

	StringRef getName(LibFunc F) const {
	auto State = getState(F);
	if (State == TargetLibraryInfoImpl::Unavailable)
	return StringRef();
	if (State == TargetLibraryInfoImpl::StandardName)
	return Impl->StandardNames[F];
	assert(State == TargetLibraryInfoImpl::CustomName);
	return Impl->CustomNames.find(F)->second;
	}

	static void initExtensionsForTriple(bool &ShouldExtI32Param,
	bool &ShouldExtI32Return,
	bool &ShouldSignExtI32Param,
	bool &ShouldSignExtI32Return,
	const Triple &T) {
	ShouldExtI32Param = ShouldExtI32Return = false;
	ShouldSignExtI32Param = ShouldSignExtI32Return = false;

	// PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
	// returns corresponding to C-level ints and unsigned ints.
	if (T.isPPC64() \|\| T.getArch() == Triple::sparcv9 \|\|
	T.getArch() == Triple::systemz) {
	ShouldExtI32Param = true;
	ShouldExtI32Return = true;
	}
	- // Mips and riscv64, on the other hand, needs signext on i32 parameters
	- // corresponding to both signed and unsigned ints.
	- if (T.isMIPS() \|\| T.isRISCV64()) {
	+ // LoongArch, Mips, and riscv64, on the other hand, need signext on i32
	+ // parameters corresponding to both signed and unsigned ints.
	+ if (T.isLoongArch() \|\| T.isMIPS() \|\| T.isRISCV64()) {
	ShouldSignExtI32Param = true;
	}
	- // riscv64 needs signext on i32 returns corresponding to both signed and
	- // unsigned ints.
	- if (T.isRISCV64()) {
	+ // LoongArch and riscv64 need signext on i32 returns corresponding to both
	+ // signed and unsigned ints.
	+ if (T.isLoongArch() \|\| T.isRISCV64()) {
	ShouldSignExtI32Return = true;
	}
	}

	/// Returns extension attribute kind to be used for i32 parameters
	/// corresponding to C-level int or unsigned int. May be zeroext, signext,
	/// or none.
	private:
	static Attribute::AttrKind getExtAttrForI32Param(bool ShouldExtI32Param_,
	bool ShouldSignExtI32Param_,
	bool Signed = true) {
	if (ShouldExtI32Param_)
	return Signed ? Attribute::SExt : Attribute::ZExt;
	if (ShouldSignExtI32Param_)
	return Attribute::SExt;
	return Attribute::None;
	}

	public:
	static Attribute::AttrKind getExtAttrForI32Param(const Triple &T,
	bool Signed = true) {
	bool ShouldExtI32Param, ShouldExtI32Return;
	bool ShouldSignExtI32Param, ShouldSignExtI32Return;
	initExtensionsForTriple(ShouldExtI32Param, ShouldExtI32Return,
	ShouldSignExtI32Param, ShouldSignExtI32Return, T);
	return getExtAttrForI32Param(ShouldExtI32Param, ShouldSignExtI32Param,
	Signed);
	}

	Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const {
	return getExtAttrForI32Param(Impl->ShouldExtI32Param,
	Impl->ShouldSignExtI32Param, Signed);
	}

	/// Returns extension attribute kind to be used for i32 return values
	/// corresponding to C-level int or unsigned int. May be zeroext, signext,
	/// or none.
	private:
	static Attribute::AttrKind getExtAttrForI32Return(bool ShouldExtI32Return_,
	bool ShouldSignExtI32Return_,
	bool Signed) {
	if (ShouldExtI32Return_)
	return Signed ? Attribute::SExt : Attribute::ZExt;
	if (ShouldSignExtI32Return_)
	return Attribute::SExt;
	return Attribute::None;
	}

	public:
	static Attribute::AttrKind getExtAttrForI32Return(const Triple &T,
	bool Signed = true) {
	bool ShouldExtI32Param, ShouldExtI32Return;
	bool ShouldSignExtI32Param, ShouldSignExtI32Return;
	initExtensionsForTriple(ShouldExtI32Param, ShouldExtI32Return,
	ShouldSignExtI32Param, ShouldSignExtI32Return, T);
	return getExtAttrForI32Return(ShouldExtI32Return, ShouldSignExtI32Return,
	Signed);
	}

	Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const {
	return getExtAttrForI32Return(Impl->ShouldExtI32Return,
	Impl->ShouldSignExtI32Return, Signed);
	}

	// Helper to create an AttributeList for args (and ret val) which all have
	// the same signedness. Attributes in AL may be passed in to include them
	// as well in the returned AttributeList.
	AttributeList getAttrList(LLVMContext *C, ArrayRef<unsigned> ArgNos,
	bool Signed, bool Ret = false,
	AttributeList AL = AttributeList()) const {
	if (auto AK = getExtAttrForI32Param(Signed))
	for (auto ArgNo : ArgNos)
	AL = AL.addParamAttribute(*C, ArgNo, AK);
	if (Ret)
	if (auto AK = getExtAttrForI32Return(Signed))
	AL = AL.addRetAttribute(*C, AK);
	return AL;
	}

	/// \copydoc TargetLibraryInfoImpl::getWCharSize()
	unsigned getWCharSize(const Module &M) const {
	return Impl->getWCharSize(M);
	}

	/// \copydoc TargetLibraryInfoImpl::getSizeTSize()
	unsigned getSizeTSize(const Module &M) const { return Impl->getSizeTSize(M); }

	/// \copydoc TargetLibraryInfoImpl::getIntSize()
	unsigned getIntSize() const {
	return Impl->getIntSize();
	}

	/// Handle invalidation from the pass manager.
	///
	/// If we try to invalidate this info, just return false. It cannot become
	/// invalid even if the module or function changes.
	bool invalidate(Module &, const PreservedAnalyses &,
	ModuleAnalysisManager::Invalidator &) {
	return false;
	}
	bool invalidate(Function &, const PreservedAnalyses &,
	FunctionAnalysisManager::Invalidator &) {
	return false;
	}
	/// Returns the largest vectorization factor used in the list of
	/// vector functions.
	void getWidestVF(StringRef ScalarF, ElementCount &FixedVF,
	ElementCount &ScalableVF) const {
	Impl->getWidestVF(ScalarF, FixedVF, ScalableVF);
	}

	/// Check if the function "F" is listed in a library known to LLVM.
	bool isKnownVectorFunctionInLibrary(StringRef F) const {
	return this->isFunctionVectorizable(F);
	}
	};

	/// Analysis pass providing the \c TargetLibraryInfo.
	///
	/// Note that this pass's result cannot be invalidated, it is immutable for the
	/// life of the module.
	class TargetLibraryAnalysis : public AnalysisInfoMixin<TargetLibraryAnalysis> {
	public:
	typedef TargetLibraryInfo Result;

	/// Default construct the library analysis.
	///
	/// This will use the module's triple to construct the library info for that
	/// module.
	TargetLibraryAnalysis() = default;

	/// Construct a library analysis with baseline Module-level info.
	///
	/// This will be supplemented with Function-specific info in the Result.
	TargetLibraryAnalysis(TargetLibraryInfoImpl BaselineInfoImpl)
	: BaselineInfoImpl(std::move(BaselineInfoImpl)) {}

	TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &);

	private:
	friend AnalysisInfoMixin<TargetLibraryAnalysis>;
	static AnalysisKey Key;

	std::optional<TargetLibraryInfoImpl> BaselineInfoImpl;
	};

	class TargetLibraryInfoWrapperPass : public ImmutablePass {
	TargetLibraryAnalysis TLA;
	std::optional<TargetLibraryInfo> TLI;

	virtual void anchor();

	public:
	static char ID;
	TargetLibraryInfoWrapperPass();
	explicit TargetLibraryInfoWrapperPass(const Triple &T);
	explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI);

	TargetLibraryInfo &getTLI(const Function &F) {
	FunctionAnalysisManager DummyFAM;
	TLI = TLA.run(F, DummyFAM);
	return *TLI;
	}
	};

	} // end namespace llvm

	#endif
	diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
	index 202fc473f9e4..609a383426d6 100644
	--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
	@@ -1,3894 +1,3897 @@
	//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the interfaces that Hexagon uses to lower LLVM code
	// into a selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#include "HexagonISelLowering.h"
	#include "Hexagon.h"
	#include "HexagonMachineFunctionInfo.h"
	#include "HexagonRegisterInfo.h"
	#include "HexagonSubtarget.h"
	#include "HexagonTargetMachine.h"
	#include "HexagonTargetObjectFile.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/TargetCallingConv.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/DiagnosticPrinter.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsHexagon.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"
	#include <algorithm>
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <limits>
	#include <utility>

	using namespace llvm;

	#define DEBUG_TYPE "hexagon-lowering"

	static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
	cl::init(true), cl::Hidden,
	cl::desc("Control jump table emission on Hexagon target"));

	static cl::opt<bool>
	EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
	cl::desc("Enable Hexagon SDNode scheduling"));

	static cl::opt<bool> EnableFastMath("ffast-math", cl::Hidden,
	cl::desc("Enable Fast Math processing"));

	static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
	cl::init(5),
	cl::desc("Set minimum jump tables"));

	static cl::opt<int>
	MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(6),
	cl::desc("Max #stores to inline memcpy"));

	static cl::opt<int>
	MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(4),
	cl::desc("Max #stores to inline memcpy"));

	static cl::opt<int>
	MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(6),
	cl::desc("Max #stores to inline memmove"));

	static cl::opt<int>
	MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden,
	cl::init(4),
	cl::desc("Max #stores to inline memmove"));

	static cl::opt<int>
	MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(8),
	cl::desc("Max #stores to inline memset"));

	static cl::opt<int>
	MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4),
	cl::desc("Max #stores to inline memset"));

	static cl::opt<bool> AlignLoads("hexagon-align-loads",
	cl::Hidden, cl::init(false),
	cl::desc("Rewrite unaligned loads as a pair of aligned loads"));

	static cl::opt<bool>
	DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
	cl::init(false),
	cl::desc("Disable minimum alignment of 1 for "
	"arguments passed by value on stack"));

	namespace {

	class HexagonCCState : public CCState {
	unsigned NumNamedVarArgParams = 0;

	public:
	HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
	SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
	unsigned NumNamedArgs)
	: CCState(CC, IsVarArg, MF, locs, C),
	NumNamedVarArgParams(NumNamedArgs) {}
	unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
	};

	} // end anonymous namespace


	// Implement calling convention for Hexagon.

	static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
	CCValAssign::LocInfo &LocInfo,
	ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	static const MCPhysReg ArgRegs[] = {
	Hexagon::R0, Hexagon::R1, Hexagon::R2,
	Hexagon::R3, Hexagon::R4, Hexagon::R5
	};
	const unsigned NumArgRegs = std::size(ArgRegs);
	unsigned RegNum = State.getFirstUnallocated(ArgRegs);

	// RegNum is an index into ArgRegs: skip a register if RegNum is odd.
	if (RegNum != NumArgRegs && RegNum % 2 == 1)
	State.AllocateReg(ArgRegs[RegNum]);

	// Always return false here, as this function only makes sure that the first
	// unallocated register has an even register number and does not actually
	// allocate a register for the current argument.
	return false;
	}

	#include "HexagonGenCallingConv.inc"


	SDValue
	HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
	const {
	return SDValue();
	}

	/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
	/// by "Src" to address "Dst" of size "Size". Alignment information is
	/// specified by the specific parameter attribute. The copy will be passed as
	/// a byval function parameter. Sometimes what we are copying is the end of a
	/// larger object, the part that does not fit in registers.
	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
	SDValue Chain, ISD::ArgFlagsTy Flags,
	SelectionDAG &DAG, const SDLoc &dl) {
	SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
	return DAG.getMemcpy(
	Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
	/isVolatile=/false, /AlwaysInline=/false,
	/isTailCall=/false, MachinePointerInfo(), MachinePointerInfo());
	}

	bool
	HexagonTargetLowering::CanLowerReturn(
	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	LLVMContext &Context) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);

	if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
	return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
	return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
	}

	// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
	// passed by value, the function prototype is modified to return void and
	// the value is stored in memory pointed by a pointer passed by caller.
	SDValue
	HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &dl, SelectionDAG &DAG) const {
	// CCValAssign - represent the assignment of the return value to locations.
	SmallVector<CCValAssign, 16> RVLocs;

	// CCState - Info about the registers and stack slot.
	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	// Analyze return values of ISD::RET
	if (Subtarget.useHVXOps())
	CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
	else
	CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);

	SDValue Flag;
	SmallVector<SDValue, 4> RetOps(1, Chain);

	// Copy the result values into the output registers.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign &VA = RVLocs[i];
	SDValue Val = OutVals[i];

	switch (VA.getLocInfo()) {
	default:
	// Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Val = DAG.getBitcast(VA.getLocVT(), Val);
	break;
	case CCValAssign::SExt:
	Val = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Val);
	break;
	case CCValAssign::ZExt:
	Val = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Val);
	break;
	case CCValAssign::AExt:
	Val = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Val);
	break;
	}

	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Flag);

	// Guarantee that all emitted copies are stuck together with flags.
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	}

	RetOps[0] = Chain; // Update chain.

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
	}

	bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	// If either no tail call or told not to tail call at all, don't.
	return CI->isTailCall();
	}

	Register HexagonTargetLowering::getRegisterByName(
	const char* RegName, LLT VT, const MachineFunction &) const {
	// Just support r19, the linux kernel uses it.
	Register Reg = StringSwitch<Register>(RegName)
	.Case("r0", Hexagon::R0)
	.Case("r1", Hexagon::R1)
	.Case("r2", Hexagon::R2)
	.Case("r3", Hexagon::R3)
	.Case("r4", Hexagon::R4)
	.Case("r5", Hexagon::R5)
	.Case("r6", Hexagon::R6)
	.Case("r7", Hexagon::R7)
	.Case("r8", Hexagon::R8)
	.Case("r9", Hexagon::R9)
	.Case("r10", Hexagon::R10)
	.Case("r11", Hexagon::R11)
	.Case("r12", Hexagon::R12)
	.Case("r13", Hexagon::R13)
	.Case("r14", Hexagon::R14)
	.Case("r15", Hexagon::R15)
	.Case("r16", Hexagon::R16)
	.Case("r17", Hexagon::R17)
	.Case("r18", Hexagon::R18)
	.Case("r19", Hexagon::R19)
	.Case("r20", Hexagon::R20)
	.Case("r21", Hexagon::R21)
	.Case("r22", Hexagon::R22)
	.Case("r23", Hexagon::R23)
	.Case("r24", Hexagon::R24)
	.Case("r25", Hexagon::R25)
	.Case("r26", Hexagon::R26)
	.Case("r27", Hexagon::R27)
	.Case("r28", Hexagon::R28)
	.Case("r29", Hexagon::R29)
	.Case("r30", Hexagon::R30)
	.Case("r31", Hexagon::R31)
	.Case("r1:0", Hexagon::D0)
	.Case("r3:2", Hexagon::D1)
	.Case("r5:4", Hexagon::D2)
	.Case("r7:6", Hexagon::D3)
	.Case("r9:8", Hexagon::D4)
	.Case("r11:10", Hexagon::D5)
	.Case("r13:12", Hexagon::D6)
	.Case("r15:14", Hexagon::D7)
	.Case("r17:16", Hexagon::D8)
	.Case("r19:18", Hexagon::D9)
	.Case("r21:20", Hexagon::D10)
	.Case("r23:22", Hexagon::D11)
	.Case("r25:24", Hexagon::D12)
	.Case("r27:26", Hexagon::D13)
	.Case("r29:28", Hexagon::D14)
	.Case("r31:30", Hexagon::D15)
	.Case("sp", Hexagon::R29)
	.Case("fp", Hexagon::R30)
	.Case("lr", Hexagon::R31)
	.Case("p0", Hexagon::P0)
	.Case("p1", Hexagon::P1)
	.Case("p2", Hexagon::P2)
	.Case("p3", Hexagon::P3)
	.Case("sa0", Hexagon::SA0)
	.Case("lc0", Hexagon::LC0)
	.Case("sa1", Hexagon::SA1)
	.Case("lc1", Hexagon::LC1)
	.Case("m0", Hexagon::M0)
	.Case("m1", Hexagon::M1)
	.Case("usr", Hexagon::USR)
	.Case("ugp", Hexagon::UGP)
	.Case("cs0", Hexagon::CS0)
	.Case("cs1", Hexagon::CS1)
	.Default(Register());
	if (Reg)
	return Reg;

	report_fatal_error("Invalid register name global variable");
	}

	/// LowerCallResult - Lower the result values of an ISD::CALL into the
	/// appropriate copies out of appropriate physical registers. This assumes that
	/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
	/// being lowered. Returns a SDNode with the same number of values as the
	/// ISD::CALL.
	SDValue HexagonTargetLowering::LowerCallResult(
	SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
	const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;

	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	if (Subtarget.useHVXOps())
	CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
	else
	CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	SDValue RetVal;
	if (RVLocs[i].getValVT() == MVT::i1) {
	// Return values of type MVT::i1 require special handling. The reason
	// is that MVT::i1 is associated with the PredRegs register class, but
	// values of that type are still returned in R0. Generate an explicit
	// copy into a predicate register from R0, and treat the value of the
	// predicate register as the call result.
	auto &MRI = DAG.getMachineFunction().getRegInfo();
	SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
	MVT::i32, Glue);
	// FR0 = (Value, Chain, Glue)
	Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
	SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
	FR0.getValue(0), FR0.getValue(2));
	// TPR = (Chain, Glue)
	// Don't glue this CopyFromReg, because it copies from a virtual
	// register. If it is glued to the call, InstrEmitter will add it
	// as an implicit def to the call (EmitMachineNode).
	RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
	Glue = TPR.getValue(1);
	Chain = TPR.getValue(0);
	} else {
	RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
	RVLocs[i].getValVT(), Glue);
	Glue = RetVal.getValue(2);
	Chain = RetVal.getValue(1);
	}
	InVals.push_back(RetVal.getValue(0));
	}

	return Chain;
	}

	/// LowerCall - Functions arguments are copied from virtual regs to
	/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
	SDValue
	HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &dl = CLI.DL;
	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	CallingConv::ID CallConv = CLI.CallConv;
	bool IsVarArg = CLI.IsVarArg;
	bool DoesNotReturn = CLI.DoesNotReturn;

	bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	auto PtrVT = getPointerTy(MF.getDataLayout());

	unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
	if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
	Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);

	// Linux ABI treats var-arg calls the same way as regular ones.
	bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
	NumParams);

	if (Subtarget.useHVXOps())
	CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
	else if (DisableArgsMinAlignment)
	CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
	else
	CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);

	if (CLI.IsTailCall) {
	bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
	CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
	IsVarArg, IsStructRet, StructAttrFlag, Outs,
	OutVals, Ins, DAG);
	for (const CCValAssign &VA : ArgLocs) {
	if (VA.isMemLoc()) {
	CLI.IsTailCall = false;
	break;
	}
	}
	LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
	: "Argument must be passed on stack. "
	"Not eligible for Tail Call\n"));
	}
	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getNextStackOffset();
	SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;

	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
	SDValue StackPtr =
	DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);

	bool NeedsArgAlign = false;
	Align LargestAlignSeen;
	// Walk the register/memloc assignments, inserting copies/loads.
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[i];
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	// Record if we need > 8 byte alignment on an argument.
	bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
	NeedsArgAlign \|= ArgAlign;

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default:
	// Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getBitcast(VA.getLocVT(), Arg);
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	}

	if (VA.isMemLoc()) {
	unsigned LocMemOffset = VA.getLocMemOffset();
	SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
	StackPtr.getValueType());
	MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
	if (ArgAlign)
	LargestAlignSeen = std::max(
	LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
	if (Flags.isByVal()) {
	// The argument is a struct passed by value. According to LLVM, "Arg"
	// is a pointer.
	MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
	Flags, DAG, dl));
	} else {
	MachinePointerInfo LocPI = MachinePointerInfo::getStack(
	DAG.getMachineFunction(), LocMemOffset);
	SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
	MemOpChains.push_back(S);
	}
	continue;
	}

	// Arguments that can be passed on register must be kept at RegsToPass
	// vector.
	if (VA.isRegLoc())
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	}

	if (NeedsArgAlign && Subtarget.hasV60Ops()) {
	LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
	Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
	LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
	MFI.ensureMaxAlignment(LargestAlignSeen);
	}
	// Transform all store nodes into one single node because all store
	// nodes are independent of each other.
	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	SDValue Glue;
	if (!CLI.IsTailCall) {
	Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
	Glue = Chain.getValue(1);
	}

	// Build a sequence of copy-to-reg nodes chained together with token
	// chain and flag operands which copy the outgoing args into registers.
	// The Glue is necessary since all emitted instructions must be
	// stuck together.
	if (!CLI.IsTailCall) {
	for (const auto &R : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
	Glue = Chain.getValue(1);
	}
	} else {
	// For tail calls lower the arguments to the 'real' stack slot.
	//
	// Force all the incoming stack arguments to be loaded from the stack
	// before any new outgoing arguments are stored to the stack, because the
	// outgoing stack slots may alias the incoming argument stack slots, and
	// the alias isn't otherwise explicit. This is slightly more conservative
	// than necessary, because it means that each store effectively depends
	// on every argument instead of just those arguments it would clobber.
	//
	// Do not flag preceding copytoreg stuff together with the following stuff.
	Glue = SDValue();
	for (const auto &R : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
	Glue = Chain.getValue(1);
	}
	Glue = SDValue();
	}

	bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
	unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;

	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
	// node so that legalize doesn't hack it.
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
	} else if (ExternalSymbolSDNode *S =
	dyn_cast<ExternalSymbolSDNode>(Callee)) {
	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
	}

	// Returns a chain & a flag for retval copy to use.
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);

	// Add argument registers to the end of the list so that they are
	// known live into the call.
	for (const auto &R : RegsToPass)
	Ops.push_back(DAG.getRegister(R.first, R.second.getValueType()));

	const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	if (Glue.getNode())
	Ops.push_back(Glue);

	if (CLI.IsTailCall) {
	MFI.setHasTailCall();
	return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
	}

	// Set this here because we need to know this for "hasFP" in frame lowering.
	// The target-independent code calls getFrameRegister before setting it, and
	// getFrameRegister uses hasFP to determine whether the function has FP.
	MFI.setHasCalls(true);

	unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
	Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
	Glue = Chain.getValue(1);

	// Create the CALLSEQ_END node.
	Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, dl);
	Glue = Chain.getValue(1);

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
	InVals, OutVals, Callee);
	}

	/// Returns true by value, base pointer and offset pointer and addressing
	/// mode by reference if this node can be combined with a load / store to
	/// form a post-indexed load / store.
	bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode N, SDNode Op,
	SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const {
	LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
	if (!LSN)
	return false;
	EVT VT = LSN->getMemoryVT();
	if (!VT.isSimple())
	return false;
	bool IsLegalType = VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\|
	VT == MVT::i64 \|\| VT == MVT::f32 \|\| VT == MVT::f64 \|\|
	VT == MVT::v2i16 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4i8 \|\|
	VT == MVT::v4i16 \|\| VT == MVT::v8i8 \|\|
	Subtarget.isHVXVectorType(VT.getSimpleVT());
	if (!IsLegalType)
	return false;

	if (Op->getOpcode() != ISD::ADD)
	return false;
	Base = Op->getOperand(0);
	Offset = Op->getOperand(1);
	if (!isa<ConstantSDNode>(Offset.getNode()))
	return false;
	AM = ISD::POST_INC;

	int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
	return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
	}

	SDValue
	HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
	unsigned LR = HRI.getRARegister();

	if ((Op.getOpcode() != ISD::INLINEASM &&
	Op.getOpcode() != ISD::INLINEASM_BR) \|\| HMFI.hasClobberLR())
	return Op;

	unsigned NumOps = Op.getNumOperands();
	if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
	--NumOps; // Ignore the flag operand.

	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
	unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
	unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
	++i; // Skip the ID value.

	switch (InlineAsm::getKind(Flags)) {
	default:
	llvm_unreachable("Bad flags!");
	case InlineAsm::Kind_RegUse:
	case InlineAsm::Kind_Imm:
	case InlineAsm::Kind_Mem:
	i += NumVals;
	break;
	case InlineAsm::Kind_Clobber:
	case InlineAsm::Kind_RegDef:
	case InlineAsm::Kind_RegDefEarlyClobber: {
	for (; NumVals; --NumVals, ++i) {
	Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
	if (Reg != LR)
	continue;
	HMFI.setHasClobberLR(true);
	return Op;
	}
	break;
	}
	}
	}

	return Op;
	}

	// Need to transform ISD::PREFETCH into something that doesn't inherit
	// all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
	// SDNPMayStore.
	SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Addr = Op.getOperand(1);
	// Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
	// if the "reg" is fed by an "add".
	SDLoc DL(Op);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
	return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
	}

	// Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
	// is marked as having side-effects, while the register read on Hexagon does
	// not have any. TableGen refuses to accept the direct pattern from that node
	// to the A4_tfrcpp.
	SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDLoc dl(Op);
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
	return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
	}

	SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	// Lower the hexagon_prefetch builtin to DCFETCH, as above.
	if (IntNo == Intrinsic::hexagon_prefetch) {
	SDValue Addr = Op.getOperand(2);
	SDLoc DL(Op);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
	return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
	}
	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);
	SDValue Align = Op.getOperand(2);
	SDLoc dl(Op);

	ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
	assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");

	unsigned A = AlignConst->getSExtValue();
	auto &HFI = *Subtarget.getFrameLowering();
	// "Zero" means natural stack alignment.
	if (A == 0)
	A = HFI.getStackAlign().value();

	LLVM_DEBUG({
	dbgs () << __func__ << " Align: " << A << " Size: ";
	Size.getNode()->dump(&DAG);
	dbgs() << "\n";
	});

	SDValue AC = DAG.getConstant(A, dl, MVT::i32);
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
	SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);

	DAG.ReplaceAllUsesOfValueWith(Op, AA);
	return AA;
	}

	SDValue HexagonTargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MachineRegisterInfo &MRI = MF.getRegInfo();

	// Linux ABI treats var-arg calls the same way as regular ones.
	bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
	*DAG.getContext(),
	MF.getFunction().getFunctionType()->getNumParams());

	if (Subtarget.useHVXOps())
	CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
	else if (DisableArgsMinAlignment)
	CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
	else
	CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);

	// For LLVM, in the case when returning a struct by value (>8byte),
	// the first argument is a pointer that points to the location on caller's
	// stack where the return value will be stored. For Hexagon, the location on
	// caller's stack is passed only when the struct size is smaller than (and
	// equal to) 8 bytes. If not, no address will be passed into callee and
	// callee return the result direclty through R0/R1.
	auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
	switch (RC.getID()) {
	case Hexagon::IntRegsRegClassID:
	return Reg - Hexagon::R0 + 1;
	case Hexagon::DoubleRegsRegClassID:
	return (Reg - Hexagon::D0 + 1) * 2;
	case Hexagon::HvxVRRegClassID:
	return Reg - Hexagon::V0 + 1;
	case Hexagon::HvxWRRegClassID:
	return (Reg - Hexagon::W0 + 1) * 2;
	}
	llvm_unreachable("Unexpected register class");
	};

	auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
	auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
	HFL.FirstVarArgSavedReg = 0;
	HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	ISD::ArgFlagsTy Flags = Ins[i].Flags;
	bool ByVal = Flags.isByVal();

	// Arguments passed in registers:
	// 1. 32- and 64-bit values and HVX vectors are passed directly,
	// 2. Large structs are passed via an address, and the address is
	// passed in a register.
	if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
	llvm_unreachable("ByValSize must be bigger than 8 bytes");

	bool InReg = VA.isRegLoc() &&
	(!ByVal \|\| (ByVal && Flags.getByValSize() > 8));

	if (InReg) {
	MVT RegVT = VA.getLocVT();
	if (VA.getLocInfo() == CCValAssign::BCvt)
	RegVT = VA.getValVT();

	const TargetRegisterClass *RC = getRegClassFor(RegVT);
	Register VReg = MRI.createVirtualRegister(RC);
	SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);

	// Treat values of type MVT::i1 specially: they are passed in
	// registers of type i32, but they need to remain as values of
	// type i1 for consistency of the argument lowering.
	if (VA.getValVT() == MVT::i1) {
	assert(RegVT.getSizeInBits() <= 32);
	SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
	Copy, DAG.getConstant(1, dl, RegVT));
	Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
	ISD::SETNE);
	} else {
	#ifndef NDEBUG
	unsigned RegSize = RegVT.getSizeInBits();
	assert(RegSize == 32 \|\| RegSize == 64 \|\|
	Subtarget.isHVXVectorType(RegVT));
	#endif
	}
	InVals.push_back(Copy);
	MRI.addLiveIn(VA.getLocReg(), VReg);
	HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
	} else {
	assert(VA.isMemLoc() && "Argument should be passed in memory");

	// If it's a byval parameter, then we need to compute the
	// "real" size, not the size of the pointer.
	unsigned ObjSize = Flags.isByVal()
	? Flags.getByValSize()
	: VA.getLocVT().getStoreSizeInBits() / 8;

	// Create the frame index object for this incoming parameter.
	int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
	int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
	SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);

	if (Flags.isByVal()) {
	// If it's a pass-by-value aggregate, then do not dereference the stack
	// location. Instead, we should generate a reference to the stack
	// location.
	InVals.push_back(FIN);
	} else {
	SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
	MachinePointerInfo::getFixedStack(MF, FI, 0));
	InVals.push_back(L);
	}
	}
	}

	if (IsVarArg && Subtarget.isEnvironmentMusl()) {
	for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
	MRI.addLiveIn(Hexagon::R0+i);
	}

	if (IsVarArg && Subtarget.isEnvironmentMusl()) {
	HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
	HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));

	// Create Frame index for the start of register saved area.
	int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
	bool RequiresPadding = (NumVarArgRegs & 1);
	int RegSaveAreaSizePlusPadding = RequiresPadding
	? (NumVarArgRegs + 1) * 4
	: NumVarArgRegs * 4;

	if (RegSaveAreaSizePlusPadding > 0) {
	// The offset to saved register area should be 8 byte aligned.
	int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
	if (!(RegAreaStart % 8))
	RegAreaStart = (RegAreaStart + 7) & -8;

	int RegSaveAreaFrameIndex =
	MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
	HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);

	// This will point to the next argument passed via stack.
	int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
	int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
	HMFI.setVarArgsFrameIndex(FI);
	} else {
	// This will point to the next argument passed via stack, when
	// there is no saved register area.
	int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
	int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
	HMFI.setRegSavedAreaStartFrameIndex(FI);
	HMFI.setVarArgsFrameIndex(FI);
	}
	}


	if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
	// This will point to the next argument passed via stack.
	int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
	int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
	HMFI.setVarArgsFrameIndex(FI);
	}

	return Chain;
	}

	SDValue
	HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
	// VASTART stores the address of the VarArgsFrameIndex slot into the
	// memory location argument.
	MachineFunction &MF = DAG.getMachineFunction();
	HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
	SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

	if (!Subtarget.isEnvironmentMusl()) {
	return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
	MachinePointerInfo(SV));
	}
	auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
	auto &HFL = *Subtarget.getFrameLowering();
	SDLoc DL(Op);
	SmallVector<SDValue, 8> MemOps;

	// Get frame index of va_list.
	SDValue FIN = Op.getOperand(1);

	// If first Vararg register is odd, add 4 bytes to start of
	// saved register area to point to the first register location.
	// This is because the saved register area has to be 8 byte aligned.
	// Incase of an odd start register, there will be 4 bytes of padding in
	// the beginning of saved register area. If all registers area used up,
	// the following condition will handle it correctly.
	SDValue SavedRegAreaStartFrameIndex =
	DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);

	auto PtrVT = getPointerTy(DAG.getDataLayout());

	if (HFL.FirstVarArgSavedReg & 1)
	SavedRegAreaStartFrameIndex =
	DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
	MVT::i32),
	DAG.getIntPtrConstant(4, DL));

	// Store the saved register area start pointer.
	SDValue Store =
	DAG.getStore(Op.getOperand(0), DL,
	SavedRegAreaStartFrameIndex,
	FIN, MachinePointerInfo(SV));
	MemOps.push_back(Store);

	// Store saved register area end pointer.
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
	FIN, DAG.getIntPtrConstant(4, DL));
	Store = DAG.getStore(Op.getOperand(0), DL,
	DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
	PtrVT),
	FIN, MachinePointerInfo(SV, 4));
	MemOps.push_back(Store);

	// Store overflow area pointer.
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
	FIN, DAG.getIntPtrConstant(4, DL));
	Store = DAG.getStore(Op.getOperand(0), DL,
	DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
	PtrVT),
	FIN, MachinePointerInfo(SV, 8));
	MemOps.push_back(Store);

	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}

	SDValue
	HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
	// Assert that the linux ABI is enabled for the current compilation.
	assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
	SDValue Chain = Op.getOperand(0);
	SDValue DestPtr = Op.getOperand(1);
	SDValue SrcPtr = Op.getOperand(2);
	const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
	SDLoc DL(Op);
	// Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
	// we need to memcopy 12 bytes from va_list to another similar list.
	return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
	DAG.getIntPtrConstant(12, DL), Align(4),
	/isVolatile/ false, false, false,
	MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
	}

	SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
	const SDLoc &dl(Op);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	MVT ResTy = ty(Op);
	MVT OpTy = ty(LHS);

	if (OpTy == MVT::v2i16 \|\| OpTy == MVT::v4i8) {
	MVT ElemTy = OpTy.getVectorElementType();
	assert(ElemTy.isScalarInteger());
	MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
	OpTy.getVectorNumElements());
	return DAG.getSetCC(dl, ResTy,
	DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
	DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
	}

	// Treat all other vector types as legal.
	if (ResTy.isVector())
	return Op;

	// Comparisons of short integers should use sign-extend, not zero-extend,
	// since we can represent small negative values in the compare instructions.
	// The LLVM default is to use zero-extend arbitrarily in these cases.
	auto isSExtFree = [this](SDValue N) {
	switch (N.getOpcode()) {
	case ISD::TRUNCATE: {
	// A sign-extend of a truncate of a sign-extend is free.
	SDValue Op = N.getOperand(0);
	if (Op.getOpcode() != ISD::AssertSext)
	return false;
	EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
	unsigned ThisBW = ty(N).getSizeInBits();
	unsigned OrigBW = OrigTy.getSizeInBits();
	// The type that was sign-extended to get the AssertSext must be
	// narrower than the type of N (so that N has still the same value
	// as the original).
	return ThisBW >= OrigBW;
	}
	case ISD::LOAD:
	// We have sign-extended loads.
	return true;
	}
	return false;
	};

	if (OpTy == MVT::i8 \|\| OpTy == MVT::i16) {
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
	bool IsNegative = C && C->getAPIntValue().isNegative();
	if (IsNegative \|\| isSExtFree(LHS) \|\| isSExtFree(RHS))
	return DAG.getSetCC(dl, ResTy,
	DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
	DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
	}

	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
	SDValue PredOp = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
	MVT OpTy = ty(Op1);
	const SDLoc &dl(Op);

	if (OpTy == MVT::v2i16 \|\| OpTy == MVT::v4i8) {
	MVT ElemTy = OpTy.getVectorElementType();
	assert(ElemTy.isScalarInteger());
	MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
	OpTy.getVectorNumElements());
	// Generate (trunc (select (_, sext, sext))).
	return DAG.getSExtOrTrunc(
	DAG.getSelect(dl, WideTy, PredOp,
	DAG.getSExtOrTrunc(Op1, dl, WideTy),
	DAG.getSExtOrTrunc(Op2, dl, WideTy)),
	dl, OpTy);
	}

	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
	EVT ValTy = Op.getValueType();
	ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
	Constant *CVal = nullptr;
	bool isVTi1Type = false;
	if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
	if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
	IRBuilder<> IRB(CV->getContext());
	SmallVector<Constant*, 128> NewConst;
	unsigned VecLen = CV->getNumOperands();
	assert(isPowerOf2_32(VecLen) &&
	"conversion only supported for pow2 VectorSize");
	for (unsigned i = 0; i < VecLen; ++i)
	NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));

	CVal = ConstantVector::get(NewConst);
	isVTi1Type = true;
	}
	}
	Align Alignment = CPN->getAlign();
	bool IsPositionIndependent = isPositionIndependent();
	unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;

	unsigned Offset = 0;
	SDValue T;
	if (CPN->isMachineConstantPoolEntry())
	T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
	Offset, TF);
	else if (isVTi1Type)
	T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
	else
	T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
	TF);

	assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
	"Inconsistent target flag encountered");

	if (IsPositionIndependent)
	return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
	return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
	}

	SDValue
	HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	int Idx = cast<JumpTableSDNode>(Op)->getIndex();
	if (isPositionIndependent()) {
	SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
	return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
	}

	SDValue T = DAG.getTargetJumpTable(Idx, VT);
	return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
	}

	SDValue
	HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
	return SDValue();

	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	if (Depth) {
	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
	SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
	return DAG.getLoad(VT, dl, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
	MachinePointerInfo());
	}

	// Return LR, which contains the return address. Mark it an implicit live-in.
	Register Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
	return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
	}

	SDValue
	HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
	HRI.getFrameRegister(), VT);
	while (Depth--)
	FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
	MachinePointerInfo());
	return FrameAddr;
	}

	SDValue
	HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
	SDLoc dl(Op);
	return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
	}

	SDValue
	HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	auto *GAN = cast<GlobalAddressSDNode>(Op);
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	auto *GV = GAN->getGlobal();
	int64_t Offset = GAN->getOffset();

	auto &HLOF = *HTM.getObjFileLowering();
	Reloc::Model RM = HTM.getRelocationModel();

	if (RM == Reloc::Static) {
	SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
	const GlobalObject *GO = GV->getAliaseeObject();
	if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
	return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
	return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
	}

	bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
	if (UsePCRel) {
	SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
	HexagonII::MO_PCREL);
	return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
	}

	// Use GOT index.
	SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
	SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
	SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
	return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
	}

	// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
	SDValue
	HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
	const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
	SDLoc dl(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	Reloc::Model RM = HTM.getRelocationModel();
	if (RM == Reloc::Static) {
	SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
	return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
	}

	SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
	return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
	}

	SDValue
	HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
	const {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
	HexagonII::MO_PCREL);
	return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
	}

	SDValue
	HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
	GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
	unsigned char OperandFlags) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDLoc dl(GA);
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(),
	OperandFlags);
	// Create Operands for the call.The Operands should have the following:
	// 1. Chain SDValue
	// 2. Callee which in this case is the Global address value.
	// 3. Registers live into the call.In this case its R0, as we
	// have just one argument to be passed.
	// 4. Glue.
	// Note: The order is important.

	const auto &HRI = *Subtarget.getRegisterInfo();
	const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
	assert(Mask && "Missing call preserved mask for calling convention");
	SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
	DAG.getRegisterMask(Mask), Glue };
	Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);

	// Inform MFI that function has calls.
	MFI.setAdjustsStack(true);

	Glue = Chain.getValue(1);
	return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
	}

	//
	// Lower using the intial executable model for TLS addresses
	//
	SDValue
	HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
	SelectionDAG &DAG) const {
	SDLoc dl(GA);
	int64_t Offset = GA->getOffset();
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	// Get the thread pointer.
	SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);

	bool IsPositionIndependent = isPositionIndependent();
	unsigned char TF =
	IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;

	// First generate the TLS symbol address
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
	Offset, TF);

	SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);

	if (IsPositionIndependent) {
	// Generate the GOT pointer in case of position independent code
	SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);

	// Add the TLS Symbol address to GOT pointer.This gives
	// GOT relative relocation for the symbol.
	Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
	}

	// Load the offset value for TLS symbol.This offset is relative to
	// thread pointer.
	SDValue LoadOffset =
	DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());

	// Address of the thread local variable is the add of thread
	// pointer and the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
	}

	//
	// Lower using the local executable model for TLS addresses
	//
	SDValue
	HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
	SelectionDAG &DAG) const {
	SDLoc dl(GA);
	int64_t Offset = GA->getOffset();
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	// Get the thread pointer.
	SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
	// Generate the TLS symbol address
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
	HexagonII::MO_TPREL);
	SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);

	// Address of the thread local variable is the add of thread
	// pointer and the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
	}

	//
	// Lower using the general dynamic model for TLS addresses
	//
	SDValue
	HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
	SelectionDAG &DAG) const {
	SDLoc dl(GA);
	int64_t Offset = GA->getOffset();
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	// First generate the TLS symbol address
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
	HexagonII::MO_GDGOT);

	// Then, generate the GOT pointer
	SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);

	// Add the TLS symbol and the GOT pointer
	SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
	SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);

	// Copy over the argument to R0
	SDValue InFlag;
	Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
	InFlag = Chain.getValue(1);

	unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
	? HexagonII::MO_GDPLT \| HexagonII::HMOTF_ConstExtended
	: HexagonII::MO_GDPLT;

	return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
	Hexagon::R0, Flags);
	}

	//
	// Lower TLS addresses.
	//
	// For now for dynamic models, we only support the general dynamic model.
	//
	SDValue
	HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

	switch (HTM.getTLSModel(GA->getGlobal())) {
	case TLSModel::GeneralDynamic:
	case TLSModel::LocalDynamic:
	return LowerToTLSGeneralDynamicModel(GA, DAG);
	case TLSModel::InitialExec:
	return LowerToTLSInitialExecModel(GA, DAG);
	case TLSModel::LocalExec:
	return LowerToTLSLocalExecModel(GA, DAG);
	}
	llvm_unreachable("Bogus TLS model");
	}

	//===----------------------------------------------------------------------===//
	// TargetLowering Implementation
	//===----------------------------------------------------------------------===//

	HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
	const HexagonSubtarget &ST)
	: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
	Subtarget(ST) {
	auto &HRI = *Subtarget.getRegisterInfo();

	setPrefLoopAlignment(Align(16));
	setMinFunctionAlignment(Align(4));
	setPrefFunctionAlignment(Align(16));
	setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
	setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
	setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);

	setMaxAtomicSizeInBitsSupported(64);
	setMinCmpXchgSizeInBits(32);

	if (EnableHexSDNodeSched)
	setSchedulingPreference(Sched::VLIW);
	else
	setSchedulingPreference(Sched::Source);

	// Limits for inline expansion of memcpy/memmove
	MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
	MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
	MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
	MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
	MaxStoresPerMemset = MaxStoresPerMemsetCL;
	MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;

	//
	// Set up register classes.
	//

	addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
	addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
	addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
	addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
	addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
	addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
	addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
	addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
	addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
	addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
	addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);

	addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
	addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);

	//
	// Handling of scalar operations.
	//
	// All operations default to "legal", except:
	// - indexed loads and stores (pre-/post-incremented),
	// - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
	// ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
	// FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
	// FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
	// which default to "expand" for at least one type.

	// Misc operations.
	setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
	setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
	setOperationAction(ISD::TRAP, MVT::Other, Legal);
	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
	setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
	setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
	setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
	setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
	setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
	setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);

	// Custom legalize GlobalAddress nodes into CONST32.
	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
	setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
	setOperationAction(ISD::BlockAddress, MVT::i32, Custom);

	// Hexagon needs to optimize cases with negative constants.
	setOperationAction(ISD::SETCC, MVT::i8, Custom);
	setOperationAction(ISD::SETCC, MVT::i16, Custom);
	setOperationAction(ISD::SETCC, MVT::v4i8, Custom);
	setOperationAction(ISD::SETCC, MVT::v2i16, Custom);

	// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
	setOperationAction(ISD::VASTART, MVT::Other, Custom);
	setOperationAction(ISD::VAEND, MVT::Other, Expand);
	setOperationAction(ISD::VAARG, MVT::Other, Expand);
	if (Subtarget.isEnvironmentMusl())
	setOperationAction(ISD::VACOPY, MVT::Other, Custom);
	else
	setOperationAction(ISD::VACOPY, MVT::Other, Expand);

	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);

	if (EmitJumpTables)
	setMinimumJumpTableEntries(MinimumJumpTables);
	else
	setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
	setOperationAction(ISD::BR_JT, MVT::Other, Expand);

	for (unsigned LegalIntOp :
	{ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
	setOperationAction(LegalIntOp, MVT::i32, Legal);
	setOperationAction(LegalIntOp, MVT::i64, Legal);
	}

	// Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
	// but they only operate on i64.
	for (MVT VT : MVT::integer_valuetypes()) {
	setOperationAction(ISD::UADDO, VT, Custom);
	setOperationAction(ISD::USUBO, VT, Custom);
	setOperationAction(ISD::SADDO, VT, Expand);
	setOperationAction(ISD::SSUBO, VT, Expand);
	setOperationAction(ISD::ADDCARRY, VT, Expand);
	setOperationAction(ISD::SUBCARRY, VT, Expand);
	}
	setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
	setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);

	setOperationAction(ISD::CTLZ, MVT::i8, Promote);
	setOperationAction(ISD::CTLZ, MVT::i16, Promote);
	setOperationAction(ISD::CTTZ, MVT::i8, Promote);
	setOperationAction(ISD::CTTZ, MVT::i16, Promote);

	// Popcount can count # of 1s in i64 but returns i32.
	setOperationAction(ISD::CTPOP, MVT::i8, Promote);
	setOperationAction(ISD::CTPOP, MVT::i16, Promote);
	setOperationAction(ISD::CTPOP, MVT::i32, Promote);
	setOperationAction(ISD::CTPOP, MVT::i64, Legal);

	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
	setOperationAction(ISD::BSWAP, MVT::i32, Legal);
	setOperationAction(ISD::BSWAP, MVT::i64, Legal);

	setOperationAction(ISD::FSHL, MVT::i32, Legal);
	setOperationAction(ISD::FSHL, MVT::i64, Legal);
	setOperationAction(ISD::FSHR, MVT::i32, Legal);
	setOperationAction(ISD::FSHR, MVT::i64, Legal);

	for (unsigned IntExpOp :
	{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
	ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
	ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
	ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
	for (MVT VT : MVT::integer_valuetypes())
	setOperationAction(IntExpOp, VT, Expand);
	}

	for (unsigned FPExpOp :
	{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
	ISD::FPOW, ISD::FCOPYSIGN}) {
	for (MVT VT : MVT::fp_valuetypes())
	setOperationAction(FPExpOp, VT, Expand);
	}

	// No extending loads from i32.
	for (MVT VT : MVT::integer_valuetypes()) {
	setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
	}
	// Turn FP truncstore into trunc + store.
	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
	// Turn FP extload into load/fpextend.
	for (MVT VT : MVT::fp_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);

	// Expand BR_CC and SELECT_CC for all integer and fp types.
	for (MVT VT : MVT::integer_valuetypes()) {
	setOperationAction(ISD::BR_CC, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	}
	for (MVT VT : MVT::fp_valuetypes()) {
	setOperationAction(ISD::BR_CC, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	}
	setOperationAction(ISD::BR_CC, MVT::Other, Expand);

	//
	// Handling of vector operations.
	//

	// Set the action for vector operations to "expand", then override it with
	// either "custom" or "legal" for specific cases.
	static const unsigned VectExpOps[] = {
	// Integer arithmetic:
	ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
	ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO,
	ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
	// Logical/bit:
	ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
	- ISD::CTPOP, ISD::CTLZ, ISD::CTTZ,
	+ ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::BSWAP, ISD::BITREVERSE,
	// Floating point arithmetic/math functions:
	ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
	ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
	ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
	ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
	ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
	ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
	// Misc:
	ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool,
	// Vector:
	ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
	ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
	ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
	ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE,
	ISD::SPLAT_VECTOR,
	};

	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	for (unsigned VectExpOp : VectExpOps)
	setOperationAction(VectExpOp, VT, Expand);

	// Expand all extending loads and truncating stores:
	for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
	if (TargetVT == VT)
	continue;
	setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
	setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
	setTruncStoreAction(VT, TargetVT, Expand);
	}

	// Normalize all inputs to SELECT to be vectors of i32.
	if (VT.getVectorElementType() != MVT::i32) {
	MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
	setOperationAction(ISD::SELECT, VT, Promote);
	AddPromotedToType(ISD::SELECT, VT, VT32);
	}
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	}

	// Extending loads from (native) vectors of i8 into (native) vectors of i16
	// are legal.
	setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
	setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
	setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);

	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);

	// Types natively supported:
	for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
	MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
	setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
	setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom);

	setOperationAction(ISD::ADD, NativeVT, Legal);
	setOperationAction(ISD::SUB, NativeVT, Legal);
	setOperationAction(ISD::MUL, NativeVT, Legal);
	setOperationAction(ISD::AND, NativeVT, Legal);
	setOperationAction(ISD::OR, NativeVT, Legal);
	setOperationAction(ISD::XOR, NativeVT, Legal);

	- if (NativeVT.getVectorElementType() != MVT::i1)
	+ if (NativeVT.getVectorElementType() != MVT::i1) {
	setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
	+ setOperationAction(ISD::BSWAP, NativeVT, Legal);
	+ setOperationAction(ISD::BITREVERSE, NativeVT, Legal);
	+ }
	}

	for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	}

	// Custom lower unaligned loads.
	// Also, for both loads and stores, verify the alignment of the address
	// in case it is a compile-time constant. This is a usability feature to
	// provide a meaningful error message to users.
	for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
	MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
	setOperationAction(ISD::LOAD, VT, Custom);
	setOperationAction(ISD::STORE, VT, Custom);
	}

	// Custom-lower load/stores of boolean vectors.
	for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
	setOperationAction(ISD::LOAD, VT, Custom);
	setOperationAction(ISD::STORE, VT, Custom);
	}

	for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
	MVT::v2i32}) {
	setCondCodeAction(ISD::SETNE, VT, Expand);
	setCondCodeAction(ISD::SETLE, VT, Expand);
	setCondCodeAction(ISD::SETGE, VT, Expand);
	setCondCodeAction(ISD::SETLT, VT, Expand);
	setCondCodeAction(ISD::SETULE, VT, Expand);
	setCondCodeAction(ISD::SETUGE, VT, Expand);
	setCondCodeAction(ISD::SETULT, VT, Expand);
	}

	// Custom-lower bitcasts from i8 to v8i1.
	setOperationAction(ISD::BITCAST, MVT::i8, Custom);
	setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
	setOperationAction(ISD::VSELECT, MVT::v4i8, Custom);
	setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);

	// V5+.
	setOperationAction(ISD::FMA, MVT::f64, Expand);
	setOperationAction(ISD::FADD, MVT::f64, Expand);
	setOperationAction(ISD::FSUB, MVT::f64, Expand);
	setOperationAction(ISD::FMUL, MVT::f64, Expand);

	setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);

	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
	setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
	setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
	setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
	setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
	setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
	setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
	setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);

	// Special handling for half-precision floating point conversions.
	// Lower half float conversions into library calls.
	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);

	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);

	// Handling of indexed loads/stores: default is "expand".
	//
	for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
	MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
	setIndexedLoadAction(ISD::POST_INC, VT, Legal);
	setIndexedStoreAction(ISD::POST_INC, VT, Legal);
	}

	// Subtarget-specific operation actions.
	//
	if (Subtarget.hasV60Ops()) {
	setOperationAction(ISD::ROTL, MVT::i32, Legal);
	setOperationAction(ISD::ROTL, MVT::i64, Legal);
	setOperationAction(ISD::ROTR, MVT::i32, Legal);
	setOperationAction(ISD::ROTR, MVT::i64, Legal);
	}
	if (Subtarget.hasV66Ops()) {
	setOperationAction(ISD::FADD, MVT::f64, Legal);
	setOperationAction(ISD::FSUB, MVT::f64, Legal);
	}
	if (Subtarget.hasV67Ops()) {
	setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
	setOperationAction(ISD::FMUL, MVT::f64, Legal);
	}

	setTargetDAGCombine(ISD::OR);
	setTargetDAGCombine(ISD::TRUNCATE);
	setTargetDAGCombine(ISD::VSELECT);

	if (Subtarget.useHVXOps())
	initializeHVXLowering();

	computeRegisterProperties(&HRI);

	//
	// Library calls for unsupported operations
	//
	bool FastMath = EnableFastMath;

	setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
	setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
	setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
	setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
	setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
	setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
	setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
	setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");

	setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
	setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
	setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
	setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
	setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
	setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");

	// This is the only fast library function for sqrtd.
	if (FastMath)
	setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");

	// Prefix is: nothing for "slow-math",
	// "fast2_" for V5+ fast-math double-precision
	// (actually, keep fast-math and fast-math2 separate for now)
	if (FastMath) {
	setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
	setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
	setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
	setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
	setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
	} else {
	setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
	setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
	setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
	setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
	setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
	}

	if (FastMath)
	setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
	else
	setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");

	// Routines to handle fp16 storage type.
	setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
	setLibcallName(RTLIB::FPROUND_F64_F16, "__truncdfhf2");
	setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");

	// These cause problems when the shift amount is non-constant.
	setLibcallName(RTLIB::SHL_I128, nullptr);
	setLibcallName(RTLIB::SRL_I128, nullptr);
	setLibcallName(RTLIB::SRA_I128, nullptr);
	}

	const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((HexagonISD::NodeType)Opcode) {
	case HexagonISD::ADDC: return "HexagonISD::ADDC";
	case HexagonISD::SUBC: return "HexagonISD::SUBC";
	case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
	case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
	case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
	case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
	case HexagonISD::CALL: return "HexagonISD::CALL";
	case HexagonISD::CALLnr: return "HexagonISD::CALLnr";
	case HexagonISD::CALLR: return "HexagonISD::CALLR";
	case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
	case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
	case HexagonISD::CONST32: return "HexagonISD::CONST32";
	case HexagonISD::CP: return "HexagonISD::CP";
	case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
	case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
	case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
	case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
	case HexagonISD::INSERT: return "HexagonISD::INSERT";
	case HexagonISD::JT: return "HexagonISD::JT";
	case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
	case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
	case HexagonISD::VASL: return "HexagonISD::VASL";
	case HexagonISD::VASR: return "HexagonISD::VASR";
	case HexagonISD::VLSR: return "HexagonISD::VLSR";
	case HexagonISD::MFSHL: return "HexagonISD::MFSHL";
	case HexagonISD::MFSHR: return "HexagonISD::MFSHR";
	case HexagonISD::SSAT: return "HexagonISD::SSAT";
	case HexagonISD::USAT: return "HexagonISD::USAT";
	case HexagonISD::SMUL_LOHI: return "HexagonISD::SMUL_LOHI";
	case HexagonISD::UMUL_LOHI: return "HexagonISD::UMUL_LOHI";
	case HexagonISD::USMUL_LOHI: return "HexagonISD::USMUL_LOHI";
	case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
	case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
	case HexagonISD::VROR: return "HexagonISD::VROR";
	case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
	case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
	case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
	case HexagonISD::D2P: return "HexagonISD::D2P";
	case HexagonISD::P2D: return "HexagonISD::P2D";
	case HexagonISD::V2Q: return "HexagonISD::V2Q";
	case HexagonISD::Q2V: return "HexagonISD::Q2V";
	case HexagonISD::QCAT: return "HexagonISD::QCAT";
	case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
	case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
	case HexagonISD::TL_EXTEND: return "HexagonISD::TL_EXTEND";
	case HexagonISD::TL_TRUNCATE: return "HexagonISD::TL_TRUNCATE";
	case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
	case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
	case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
	case HexagonISD::ISEL: return "HexagonISD::ISEL";
	case HexagonISD::OP_END: break;
	}
	return nullptr;
	}

	bool
	HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
	const SDLoc &dl, SelectionDAG &DAG) const {
	auto *CA = dyn_cast<ConstantSDNode>(Ptr);
	if (!CA)
	return true;
	unsigned Addr = CA->getZExtValue();
	Align HaveAlign =
	Addr != 0 ? Align(1ull << countTrailingZeros(Addr)) : NeedAlign;
	if (HaveAlign >= NeedAlign)
	return true;

	static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();

	struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
	DiagnosticInfoMisalignedTrap(StringRef M)
	: DiagnosticInfo(DK_MisalignedTrap, DS_Remark), Msg(M) {}
	void print(DiagnosticPrinter &DP) const override {
	DP << Msg;
	}
	static bool classof(const DiagnosticInfo *DI) {
	return DI->getKind() == DK_MisalignedTrap;
	}
	StringRef Msg;
	};

	std::string ErrMsg;
	raw_string_ostream O(ErrMsg);
	O << "Misaligned constant address: " << format_hex(Addr, 10)
	<< " has alignment " << HaveAlign.value()
	<< ", but the memory access requires " << NeedAlign.value();
	if (DebugLoc DL = dl.getDebugLoc())
	DL.print(O << ", at ");
	O << ". The instruction has been replaced with a trap.";

	DAG.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O.str()));
	return false;
	}

	SDValue
	HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
	const {
	const SDLoc &dl(Op);
	auto *LS = cast<LSBaseSDNode>(Op.getNode());
	assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");

	SDValue Chain = LS->getChain();
	SDValue Trap = DAG.getNode(ISD::TRAP, dl, MVT::Other, Chain);
	if (LS->getOpcode() == ISD::LOAD)
	return DAG.getMergeValues({DAG.getUNDEF(ty(Op)), Trap}, dl);
	return Trap;
	}

	// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
	// intrinsic.
	static bool isBrevLdIntrinsic(const Value *Inst) {
	unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
	return (ID == Intrinsic::hexagon_L2_loadrd_pbr \|\|
	ID == Intrinsic::hexagon_L2_loadri_pbr \|\|
	ID == Intrinsic::hexagon_L2_loadrh_pbr \|\|
	ID == Intrinsic::hexagon_L2_loadruh_pbr \|\|
	ID == Intrinsic::hexagon_L2_loadrb_pbr \|\|
	ID == Intrinsic::hexagon_L2_loadrub_pbr);
	}

	// Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
	// instruction. So far we only handle bitcast, extract value and bit reverse
	// load intrinsic instructions. Should we handle CGEP ?
	static Value getBrevLdObject(Value V) {
	if (Operator::getOpcode(V) == Instruction::ExtractValue \|\|
	Operator::getOpcode(V) == Instruction::BitCast)
	V = cast<Operator>(V)->getOperand(0);
	else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
	V = cast<Instruction>(V)->getOperand(0);
	return V;
	}

	// Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
	// a back edge. If the back edge comes from the intrinsic itself, the incoming
	// edge is returned.
	static Value returnEdge(const PHINode PN, Value *IntrBaseVal) {
	const BasicBlock *Parent = PN->getParent();
	int Idx = -1;
	for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
	BasicBlock *Blk = PN->getIncomingBlock(i);
	// Determine if the back edge is originated from intrinsic.
	if (Blk == Parent) {
	Value *BackEdgeVal = PN->getIncomingValue(i);
	Value *BaseVal;
	// Loop over till we return the same Value or we hit the IntrBaseVal.
	do {
	BaseVal = BackEdgeVal;
	BackEdgeVal = getBrevLdObject(BackEdgeVal);
	} while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
	// If the getBrevLdObject returns IntrBaseVal, we should return the
	// incoming edge.
	if (IntrBaseVal == BackEdgeVal)
	continue;
	Idx = i;
	break;
	} else // Set the node to incoming edge.
	Idx = i;
	}
	assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
	return PN->getIncomingValue(Idx);
	}

	// Bit-reverse Load Intrinsic: Figure out the underlying object the base
	// pointer points to, for the bit-reverse load intrinsic. Setting this to
	// memoperand might help alias analysis to figure out the dependencies.
	static Value getUnderLyingObjectForBrevLdIntr(Value V) {
	Value *IntrBaseVal = V;
	Value *BaseVal;
	// Loop over till we return the same Value, implies we either figure out
	// the object or we hit a PHI
	do {
	BaseVal = V;
	V = getBrevLdObject(V);
	} while (BaseVal != V);

	// Identify the object from PHINode.
	if (const PHINode *PN = dyn_cast<PHINode>(V))
	return returnEdge(PN, IntrBaseVal);
	// For non PHI nodes, the object is the last value returned by getBrevLdObject
	else
	return V;
	}

	/// Given an intrinsic, checks if on the target the intrinsic will need to map
	/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
	/// true and store the intrinsic information into the IntrinsicInfo that was
	/// passed to the function.
	bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	MachineFunction &MF,
	unsigned Intrinsic) const {
	switch (Intrinsic) {
	case Intrinsic::hexagon_L2_loadrd_pbr:
	case Intrinsic::hexagon_L2_loadri_pbr:
	case Intrinsic::hexagon_L2_loadrh_pbr:
	case Intrinsic::hexagon_L2_loadruh_pbr:
	case Intrinsic::hexagon_L2_loadrb_pbr:
	case Intrinsic::hexagon_L2_loadrub_pbr: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
	auto &Cont = I.getCalledFunction()->getParent()->getContext();
	// The intrinsic function call is of the form { ElTy, i8* }
	// @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
	// should be derived from ElTy.
	Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
	Info.memVT = MVT::getVT(ElTy);
	llvm::Value *BasePtrVal = I.getOperand(0);
	Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
	// The offset value comes through Modifier register. For now, assume the
	// offset is 0.
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
	Info.flags = MachineMemOperand::MOLoad;
	return true;
	}
	case Intrinsic::hexagon_V6_vgathermw:
	case Intrinsic::hexagon_V6_vgathermw_128B:
	case Intrinsic::hexagon_V6_vgathermh:
	case Intrinsic::hexagon_V6_vgathermh_128B:
	case Intrinsic::hexagon_V6_vgathermhw:
	case Intrinsic::hexagon_V6_vgathermhw_128B:
	case Intrinsic::hexagon_V6_vgathermwq:
	case Intrinsic::hexagon_V6_vgathermwq_128B:
	case Intrinsic::hexagon_V6_vgathermhq:
	case Intrinsic::hexagon_V6_vgathermhq_128B:
	case Intrinsic::hexagon_V6_vgathermhwq:
	case Intrinsic::hexagon_V6_vgathermhwq_128B: {
	const Module &M = *I.getParent()->getParent()->getParent();
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Type *VecTy = I.getArgOperand(1)->getType();
	Info.memVT = MVT::getVT(VecTy);
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align =
	MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
	Info.flags = MachineMemOperand::MOLoad \|
	MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	default:
	break;
	}
	return false;
	}

	bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
	return X.getValueType().isScalarInteger(); // 'tstbit'
	}

	bool HexagonTargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
	return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
	}

	bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
	if (!VT1.isSimple() \|\| !VT2.isSimple())
	return false;
	return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
	}

	bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
	const MachineFunction &MF, EVT VT) const {
	return isOperationLegalOrCustom(ISD::FMA, VT);
	}

	// Should we expand the build vector with shuffles?
	bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
	unsigned DefinedValues) const {
	return false;
	}

	bool HexagonTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
	unsigned Index) const {
	assert(ResVT.getVectorElementType() == SrcVT.getVectorElementType());
	if (!ResVT.isSimple() \|\| !SrcVT.isSimple())
	return false;

	MVT ResTy = ResVT.getSimpleVT(), SrcTy = SrcVT.getSimpleVT();
	if (ResTy.getVectorElementType() != MVT::i1)
	return true;

	// Non-HVX bool vectors are relatively cheap.
	return SrcTy.getVectorNumElements() <= 8;
	}

	bool HexagonTargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
	return Op.getOpcode() == ISD::CONCAT_VECTORS \|\|
	TargetLowering::isTargetCanonicalConstantNode(Op);
	}

	bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
	EVT VT) const {
	return true;
	}

	TargetLoweringBase::LegalizeTypeAction
	HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
	unsigned VecLen = VT.getVectorMinNumElements();
	MVT ElemTy = VT.getVectorElementType();

	if (VecLen == 1 \|\| VT.isScalableVector())
	return TargetLoweringBase::TypeScalarizeVector;

	if (Subtarget.useHVXOps()) {
	unsigned Action = getPreferredHvxVectorAction(VT);
	if (Action != ~0u)
	return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
	}

	// Always widen (remaining) vectors of i1.
	if (ElemTy == MVT::i1)
	return TargetLoweringBase::TypeWidenVector;
	// Widen non-power-of-2 vectors. Such types cannot be split right now,
	// and computeRegisterProperties will override "split" with "widen",
	// which can cause other issues.
	if (!isPowerOf2_32(VecLen))
	return TargetLoweringBase::TypeWidenVector;

	return TargetLoweringBase::TypeSplitVector;
	}

	TargetLoweringBase::LegalizeAction
	HexagonTargetLowering::getCustomOperationAction(SDNode &Op) const {
	if (Subtarget.useHVXOps()) {
	unsigned Action = getCustomHvxOperationAction(Op);
	if (Action != ~0u)
	return static_cast<TargetLoweringBase::LegalizeAction>(Action);
	}
	return TargetLoweringBase::Legal;
	}

	std::pair<SDValue, int>
	HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
	if (Addr.getOpcode() == ISD::ADD) {
	SDValue Op1 = Addr.getOperand(1);
	if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
	return { Addr.getOperand(0), CN->getSExtValue() };
	}
	return { Addr, 0 };
	}

	// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
	// to select data from, V3 is the permutation.
	SDValue
	HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
	const {
	const auto *SVN = cast<ShuffleVectorSDNode>(Op);
	ArrayRef<int> AM = SVN->getMask();
	assert(AM.size() <= 8 && "Unexpected shuffle mask");
	unsigned VecLen = AM.size();

	MVT VecTy = ty(Op);
	assert(!Subtarget.isHVXVectorType(VecTy, true) &&
	"HVX shuffles should be legal");
	assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	const SDLoc &dl(Op);

	// If the inputs are not the same as the output, bail. This is not an
	// error situation, but complicates the handling and the default expansion
	// (into BUILD_VECTOR) should be adequate.
	if (ty(Op0) != VecTy \|\| ty(Op1) != VecTy)
	return SDValue();

	// Normalize the mask so that the first non-negative index comes from
	// the first operand.
	SmallVector<int,8> Mask(AM.begin(), AM.end());
	unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
	if (F == AM.size())
	return DAG.getUNDEF(VecTy);
	if (AM[F] >= int(VecLen)) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(Op0, Op1);
	}

	// Express the shuffle mask in terms of bytes.
	SmallVector<int,8> ByteMask;
	unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
	for (int M : Mask) {
	if (M < 0) {
	for (unsigned j = 0; j != ElemBytes; ++j)
	ByteMask.push_back(-1);
	} else {
	for (unsigned j = 0; j != ElemBytes; ++j)
	ByteMask.push_back(M*ElemBytes + j);
	}
	}
	assert(ByteMask.size() <= 8);

	// All non-undef (non-negative) indexes are well within [0..127], so they
	// fit in a single byte. Build two 64-bit words:
	// - MaskIdx where each byte is the corresponding index (for non-negative
	// indexes), and 0xFF for negative indexes, and
	// - MaskUnd that has 0xFF for each negative index.
	uint64_t MaskIdx = 0;
	uint64_t MaskUnd = 0;
	for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
	unsigned S = 8*i;
	uint64_t M = ByteMask[i] & 0xFF;
	if (M == 0xFF)
	MaskUnd \|= M << S;
	MaskIdx \|= M << S;
	}

	if (ByteMask.size() == 4) {
	// Identity.
	if (MaskIdx == (0x03020100 \| MaskUnd))
	return Op0;
	// Byte swap.
	if (MaskIdx == (0x00010203 \| MaskUnd)) {
	SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
	SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
	return DAG.getBitcast(VecTy, T1);
	}

	// Byte packs.
	SDValue Concat10 =
	getCombine(Op1, Op0, dl, typeJoin({ty(Op1), ty(Op0)}), DAG);
	if (MaskIdx == (0x06040200 \| MaskUnd))
	return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
	if (MaskIdx == (0x07050301 \| MaskUnd))
	return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);

	SDValue Concat01 =
	getCombine(Op0, Op1, dl, typeJoin({ty(Op0), ty(Op1)}), DAG);
	if (MaskIdx == (0x02000604 \| MaskUnd))
	return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
	if (MaskIdx == (0x03010705 \| MaskUnd))
	return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
	}

	if (ByteMask.size() == 8) {
	// Identity.
	if (MaskIdx == (0x0706050403020100ull \| MaskUnd))
	return Op0;
	// Byte swap.
	if (MaskIdx == (0x0001020304050607ull \| MaskUnd)) {
	SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
	SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
	return DAG.getBitcast(VecTy, T1);
	}

	// Halfword picks.
	if (MaskIdx == (0x0d0c050409080100ull \| MaskUnd))
	return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
	if (MaskIdx == (0x0f0e07060b0a0302ull \| MaskUnd))
	return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
	if (MaskIdx == (0x0d0c090805040100ull \| MaskUnd))
	return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
	if (MaskIdx == (0x0f0e0b0a07060302ull \| MaskUnd))
	return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
	if (MaskIdx == (0x0706030205040100ull \| MaskUnd)) {
	VectorPair P = opSplit(Op0, dl, DAG);
	return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
	}

	// Byte packs.
	if (MaskIdx == (0x0e060c040a020800ull \| MaskUnd))
	return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
	if (MaskIdx == (0x0f070d050b030901ull \| MaskUnd))
	return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
	}

	return SDValue();
	}

	SDValue
	HexagonTargetLowering::getSplatValue(SDValue Op, SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	case ISD::BUILD_VECTOR:
	if (SDValue S = cast<BuildVectorSDNode>(Op)->getSplatValue())
	return S;
	break;
	case ISD::SPLAT_VECTOR:
	return Op.getOperand(0);
	}
	return SDValue();
	}

	// Create a Hexagon-specific node for shifting a vector by an integer.
	SDValue
	HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
	const {
	unsigned NewOpc;
	switch (Op.getOpcode()) {
	case ISD::SHL:
	NewOpc = HexagonISD::VASL;
	break;
	case ISD::SRA:
	NewOpc = HexagonISD::VASR;
	break;
	case ISD::SRL:
	NewOpc = HexagonISD::VLSR;
	break;
	default:
	llvm_unreachable("Unexpected shift opcode");
	}

	if (SDValue Sp = getSplatValue(Op.getOperand(1), DAG))
	return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), Sp);
	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
	const SDLoc &dl(Op);

	// First try to convert the shift (by vector) to a shift by a scalar.
	// If we first split the shift, the shift amount will become 'extract
	// subvector', and will no longer be recognized as scalar.
	SDValue Res = Op;
	if (SDValue S = getVectorShiftByInt(Op, DAG))
	Res = S;

	unsigned Opc = Res.getOpcode();
	switch (Opc) {
	case HexagonISD::VASR:
	case HexagonISD::VLSR:
	case HexagonISD::VASL:
	break;
	default:
	// No instructions for shifts by non-scalars.
	return SDValue();
	}

	MVT ResTy = ty(Res);
	if (ResTy.getVectorElementType() != MVT::i8)
	return Res;

	// For shifts of i8, extend the inputs to i16, then truncate back to i8.
	assert(ResTy.getVectorElementType() == MVT::i8);
	SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1);

	auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
	MVT Ty = ty(V);
	MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements());
	SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy)
	: DAG.getZExtOrTrunc(V, dl, ExtTy);
	SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A});
	return DAG.getZExtOrTrunc(ExtS, dl, Ty);
	};

	if (ResTy.getSizeInBits() == 32)
	return ShiftPartI8(Opc, Val, Amt);

	auto [LoV, HiV] = opSplit(Val, dl, DAG);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy,
	{ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
	}

	SDValue
	HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
	if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
	return Op;
	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
	MVT ResTy = ty(Op);
	SDValue InpV = Op.getOperand(0);
	MVT InpTy = ty(InpV);
	assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
	const SDLoc &dl(Op);

	// Handle conversion from i8 to v8i1.
	if (InpTy == MVT::i8) {
	if (ResTy == MVT::v8i1) {
	SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
	SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
	return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
	}
	return SDValue();
	}

	return Op;
	}

	bool
	HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
	MVT VecTy, SelectionDAG &DAG,
	MutableArrayRef<ConstantInt*> Consts) const {
	MVT ElemTy = VecTy.getVectorElementType();
	unsigned ElemWidth = ElemTy.getSizeInBits();
	IntegerType IntTy = IntegerType::get(DAG.getContext(), ElemWidth);
	bool AllConst = true;

	for (unsigned i = 0, e = Values.size(); i != e; ++i) {
	SDValue V = Values[i];
	if (V.isUndef()) {
	Consts[i] = ConstantInt::get(IntTy, 0);
	continue;
	}
	// Make sure to always cast to IntTy.
	if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
	const ConstantInt *CI = CN->getConstantIntValue();
	Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
	} else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
	const ConstantFP *CF = CN->getConstantFPValue();
	APInt A = CF->getValueAPF().bitcastToAPInt();
	Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
	} else {
	AllConst = false;
	}
	}
	return AllConst;
	}

	SDValue
	HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
	MVT VecTy, SelectionDAG &DAG) const {
	MVT ElemTy = VecTy.getVectorElementType();
	assert(VecTy.getVectorNumElements() == Elem.size());

	SmallVector<ConstantInt*,4> Consts(Elem.size());
	bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);

	unsigned First, Num = Elem.size();
	for (First = 0; First != Num; ++First) {
	if (!isUndef(Elem[First]))
	break;
	}
	if (First == Num)
	return DAG.getUNDEF(VecTy);

	if (AllConst &&
	llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
	return getZero(dl, VecTy, DAG);

	if (ElemTy == MVT::i16 \|\| ElemTy == MVT::f16) {
	assert(Elem.size() == 2);
	if (AllConst) {
	// The 'Consts' array will have all values as integers regardless
	// of the vector element type.
	uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) \|
	Consts[1]->getZExtValue() << 16;
	return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32));
	}
	SDValue E0, E1;
	if (ElemTy == MVT::f16) {
	E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[0]), dl, MVT::i32);
	E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[1]), dl, MVT::i32);
	} else {
	E0 = Elem[0];
	E1 = Elem[1];
	}
	SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG);
	return DAG.getBitcast(VecTy, N);
	}

	if (ElemTy == MVT::i8) {
	// First try generating a constant.
	if (AllConst) {
	int32_t V = (Consts[0]->getZExtValue() & 0xFF) \|
	(Consts[1]->getZExtValue() & 0xFF) << 8 \|
	(Consts[2]->getZExtValue() & 0xFF) << 16 \|
	Consts[3]->getZExtValue() << 24;
	return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
	}

	// Then try splat.
	bool IsSplat = true;
	for (unsigned i = First+1; i != Num; ++i) {
	if (Elem[i] == Elem[First] \|\| isUndef(Elem[i]))
	continue;
	IsSplat = false;
	break;
	}
	if (IsSplat) {
	// Legalize the operand of SPLAT_VECTOR.
	SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
	return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
	}

	// Generate
	// (zxtb(Elem[0]) \| (zxtb(Elem[1]) << 8)) \|
	// (zxtb(Elem[2]) \| (zxtb(Elem[3]) << 8)) << 16
	assert(Elem.size() == 4);
	SDValue Vs[4];
	for (unsigned i = 0; i != 4; ++i) {
	Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
	Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
	}
	SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
	SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
	SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
	SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
	SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});

	SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
	return DAG.getBitcast(MVT::v4i8, R);
	}

	#ifndef NDEBUG
	dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
	#endif
	llvm_unreachable("Unexpected vector element type");
	}

	SDValue
	HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
	MVT VecTy, SelectionDAG &DAG) const {
	MVT ElemTy = VecTy.getVectorElementType();
	assert(VecTy.getVectorNumElements() == Elem.size());

	SmallVector<ConstantInt*,8> Consts(Elem.size());
	bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);

	unsigned First, Num = Elem.size();
	for (First = 0; First != Num; ++First) {
	if (!isUndef(Elem[First]))
	break;
	}
	if (First == Num)
	return DAG.getUNDEF(VecTy);

	if (AllConst &&
	llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
	return getZero(dl, VecTy, DAG);

	// First try splat if possible.
	if (ElemTy == MVT::i16 \|\| ElemTy == MVT::f16) {
	bool IsSplat = true;
	for (unsigned i = First+1; i != Num; ++i) {
	if (Elem[i] == Elem[First] \|\| isUndef(Elem[i]))
	continue;
	IsSplat = false;
	break;
	}
	if (IsSplat) {
	// Legalize the operand of SPLAT_VECTOR
	SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First])
	: Elem[First];
	SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32);
	return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
	}
	}

	// Then try constant.
	if (AllConst) {
	uint64_t Val = 0;
	unsigned W = ElemTy.getSizeInBits();
	uint64_t Mask = (1ull << W) - 1;
	for (unsigned i = 0; i != Num; ++i)
	Val = (Val << W) \| (Consts[Num-1-i]->getZExtValue() & Mask);
	SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
	return DAG.getBitcast(VecTy, V0);
	}

	// Build two 32-bit vectors and concatenate.
	MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
	SDValue L = (ElemTy == MVT::i32)
	? Elem[0]
	: buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
	SDValue H = (ElemTy == MVT::i32)
	? Elem[1]
	: buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
	return getCombine(H, L, dl, VecTy, DAG);
	}

	SDValue
	HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
	const SDLoc &dl, MVT ValTy, MVT ResTy,
	SelectionDAG &DAG) const {
	MVT VecTy = ty(VecV);
	assert(!ValTy.isVector() \|\|
	VecTy.getVectorElementType() == ValTy.getVectorElementType());
	if (VecTy.getVectorElementType() == MVT::i1)
	return extractVectorPred(VecV, IdxV, dl, ValTy, ResTy, DAG);

	unsigned VecWidth = VecTy.getSizeInBits();
	unsigned ValWidth = ValTy.getSizeInBits();
	unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
	assert((VecWidth % ElemWidth) == 0);
	assert(VecWidth == 32 \|\| VecWidth == 64);

	// Cast everything to scalar integer types.
	MVT ScalarTy = tyScalar(VecTy);
	VecV = DAG.getBitcast(ScalarTy, VecV);

	SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
	SDValue ExtV;

	if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
	unsigned Off = IdxN->getZExtValue() * ElemWidth;
	if (VecWidth == 64 && ValWidth == 32) {
	assert(Off == 0 \|\| Off == 32);
	ExtV = Off == 0 ? LoHalf(VecV, DAG) : HiHalf(VecV, DAG);
	} else if (Off == 0 && (ValWidth % 8) == 0) {
	ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
	} else {
	SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
	// The return type of EXTRACTU must be the same as the type of the
	// input vector.
	ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
	{VecV, WidthV, OffV});
	}
	} else {
	if (ty(IdxV) != MVT::i32)
	IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
	SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
	DAG.getConstant(ElemWidth, dl, MVT::i32));
	ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
	{VecV, WidthV, OffV});
	}

	// Cast ExtV to the requested result type.
	ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
	ExtV = DAG.getBitcast(ResTy, ExtV);
	return ExtV;
	}

	SDValue
	HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
	const SDLoc &dl, MVT ValTy, MVT ResTy,
	SelectionDAG &DAG) const {
	// Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
	// without any coprocessors).
	MVT VecTy = ty(VecV);
	unsigned VecWidth = VecTy.getSizeInBits();
	unsigned ValWidth = ValTy.getSizeInBits();
	assert(VecWidth == VecTy.getVectorNumElements() &&
	"Vector elements should equal vector width size");
	assert(VecWidth == 8 \|\| VecWidth == 4 \|\| VecWidth == 2);

	// Check if this is an extract of the lowest bit.
	if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
	// Extracting the lowest bit is a no-op, but it changes the type,
	// so it must be kept as an operation to avoid errors related to
	// type mismatches.
	if (IdxN->isZero() && ValTy.getSizeInBits() == 1)
	return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
	}

	// If the value extracted is a single bit, use tstbit.
	if (ValWidth == 1) {
	SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
	SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
	SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
	return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
	}

	// Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
	// a predicate register. The elements of the vector are repeated
	// in the register (if necessary) so that the total number is 8.
	// The extracted subvector will need to be expanded in such a way.
	unsigned Scale = VecWidth / ValWidth;

	// Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
	// position 0.
	assert(ty(IdxV) == MVT::i32);
	unsigned VecRep = 8 / VecWidth;
	SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
	DAG.getConstant(8*VecRep, dl, MVT::i32));
	SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
	SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
	while (Scale > 1) {
	// The longest possible subvector is at most 32 bits, so it is always
	// contained in the low subregister.
	T1 = LoHalf(T1, DAG);
	T1 = expandPredicate(T1, dl, DAG);
	Scale /= 2;
	}

	return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
	}

	SDValue
	HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
	const SDLoc &dl, MVT ValTy,
	SelectionDAG &DAG) const {
	MVT VecTy = ty(VecV);
	if (VecTy.getVectorElementType() == MVT::i1)
	return insertVectorPred(VecV, ValV, IdxV, dl, ValTy, DAG);

	unsigned VecWidth = VecTy.getSizeInBits();
	unsigned ValWidth = ValTy.getSizeInBits();
	assert(VecWidth == 32 \|\| VecWidth == 64);
	assert((VecWidth % ValWidth) == 0);

	// Cast everything to scalar integer types.
	MVT ScalarTy = MVT::getIntegerVT(VecWidth);
	// The actual type of ValV may be different than ValTy (which is related
	// to the vector type).
	unsigned VW = ty(ValV).getSizeInBits();
	ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
	VecV = DAG.getBitcast(ScalarTy, VecV);
	if (VW != VecWidth)
	ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);

	SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
	SDValue InsV;

	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
	unsigned W = C->getZExtValue() * ValWidth;
	SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
	InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
	{VecV, ValV, WidthV, OffV});
	} else {
	if (ty(IdxV) != MVT::i32)
	IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
	SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
	InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
	{VecV, ValV, WidthV, OffV});
	}

	return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
	}

	SDValue
	HexagonTargetLowering::insertVectorPred(SDValue VecV, SDValue ValV,
	SDValue IdxV, const SDLoc &dl,
	MVT ValTy, SelectionDAG &DAG) const {
	MVT VecTy = ty(VecV);
	unsigned VecLen = VecTy.getVectorNumElements();

	if (ValTy == MVT::i1) {
	SDValue ToReg = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
	SDValue Ext = DAG.getSExtOrTrunc(ValV, dl, MVT::i32);
	SDValue Width = DAG.getConstant(8 / VecLen, dl, MVT::i32);
	SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
	SDValue Ins =
	DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, {ToReg, Ext, Width, Idx});
	return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Ins}, DAG);
	}

	assert(ValTy.getVectorElementType() == MVT::i1);
	SDValue ValR = ValTy.isVector()
	? DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV)
	: DAG.getSExtOrTrunc(ValV, dl, MVT::i64);

	unsigned Scale = VecLen / ValTy.getVectorNumElements();
	assert(Scale > 1);

	for (unsigned R = Scale; R > 1; R /= 2) {
	ValR = contractPredicate(ValR, dl, DAG);
	ValR = getCombine(DAG.getUNDEF(MVT::i32), ValR, dl, MVT::i64, DAG);
	}

	SDValue Width = DAG.getConstant(64 / Scale, dl, MVT::i32);
	SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
	SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
	SDValue Ins =
	DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, {VecR, ValR, Width, Idx});
	return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
	}

	SDValue
	HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
	SelectionDAG &DAG) const {
	assert(ty(Vec32).getSizeInBits() == 32);
	if (isUndef(Vec32))
	return DAG.getUNDEF(MVT::i64);
	SDValue P = DAG.getBitcast(MVT::v4i8, Vec32);
	SDValue X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i16, P);
	return DAG.getBitcast(MVT::i64, X);
	}

	SDValue
	HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
	SelectionDAG &DAG) const {
	assert(ty(Vec64).getSizeInBits() == 64);
	if (isUndef(Vec64))
	return DAG.getUNDEF(MVT::i32);
	// Collect even bytes:
	SDValue A = DAG.getBitcast(MVT::v8i8, Vec64);
	SDValue S = DAG.getVectorShuffle(MVT::v8i8, dl, A, DAG.getUNDEF(MVT::v8i8),
	{0, 2, 4, 6, 1, 3, 5, 7});
	return extractVector(S, DAG.getConstant(0, dl, MVT::i32), dl, MVT::v4i8,
	MVT::i32, DAG);
	}

	SDValue
	HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
	const {
	if (Ty.isVector()) {
	unsigned W = Ty.getSizeInBits();
	if (W <= 64)
	return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
	return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
	}

	if (Ty.isInteger())
	return DAG.getConstant(0, dl, Ty);
	if (Ty.isFloatingPoint())
	return DAG.getConstantFP(0.0, dl, Ty);
	llvm_unreachable("Invalid type for zero");
	}

	SDValue
	HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
	const {
	MVT ValTy = ty(Val);
	assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());

	unsigned ValLen = ValTy.getVectorNumElements();
	unsigned ResLen = ResTy.getVectorNumElements();
	if (ValLen == ResLen)
	return Val;

	const SDLoc &dl(Val);
	assert(ValLen < ResLen);
	assert(ResLen % ValLen == 0);

	SmallVector<SDValue, 4> Concats = {Val};
	for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
	Concats.push_back(DAG.getUNDEF(ValTy));

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
	}

	SDValue
	HexagonTargetLowering::getCombine(SDValue Hi, SDValue Lo, const SDLoc &dl,
	MVT ResTy, SelectionDAG &DAG) const {
	MVT ElemTy = ty(Hi);
	assert(ElemTy == ty(Lo));

	if (!ElemTy.isVector()) {
	assert(ElemTy.isScalarInteger());
	MVT PairTy = MVT::getIntegerVT(2 * ElemTy.getSizeInBits());
	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, PairTy, Lo, Hi);
	return DAG.getBitcast(ResTy, Pair);
	}

	unsigned Width = ElemTy.getSizeInBits();
	MVT IntTy = MVT::getIntegerVT(Width);
	MVT PairTy = MVT::getIntegerVT(2 * Width);
	SDValue Pair =
	DAG.getNode(ISD::BUILD_PAIR, dl, PairTy,
	{DAG.getBitcast(IntTy, Lo), DAG.getBitcast(IntTy, Hi)});
	return DAG.getBitcast(ResTy, Pair);
	}

	SDValue
	HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
	MVT VecTy = ty(Op);
	unsigned BW = VecTy.getSizeInBits();
	const SDLoc &dl(Op);
	SmallVector<SDValue,8> Ops;
	for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
	Ops.push_back(Op.getOperand(i));

	if (BW == 32)
	return buildVector32(Ops, dl, VecTy, DAG);
	if (BW == 64)
	return buildVector64(Ops, dl, VecTy, DAG);

	if (VecTy == MVT::v8i1 \|\| VecTy == MVT::v4i1 \|\| VecTy == MVT::v2i1) {
	// Check if this is a special case or all-0 or all-1.
	bool All0 = true, All1 = true;
	for (SDValue P : Ops) {
	auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
	if (CN == nullptr) {
	All0 = All1 = false;
	break;
	}
	uint32_t C = CN->getZExtValue();
	All0 &= (C == 0);
	All1 &= (C == 1);
	}
	if (All0)
	return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
	if (All1)
	return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);

	// For each i1 element in the resulting predicate register, put 1
	// shifted by the index of the element into a general-purpose register,
	// then or them together and transfer it back into a predicate register.
	SDValue Rs[8];
	SDValue Z = getZero(dl, MVT::i32, DAG);
	// Always produce 8 bits, repeat inputs if necessary.
	unsigned Rep = 8 / VecTy.getVectorNumElements();
	for (unsigned i = 0; i != 8; ++i) {
	SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
	Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
	}
	for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
	for (unsigned i = 0, e = A.size()/2; i != e; ++i)
	Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2i], Rs[2i+1]);
	}
	// Move the value directly to a predicate register.
	return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
	}

	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
	SelectionDAG &DAG) const {
	MVT VecTy = ty(Op);
	const SDLoc &dl(Op);
	if (VecTy.getSizeInBits() == 64) {
	assert(Op.getNumOperands() == 2);
	return getCombine(Op.getOperand(1), Op.getOperand(0), dl, VecTy, DAG);
	}

	MVT ElemTy = VecTy.getVectorElementType();
	if (ElemTy == MVT::i1) {
	assert(VecTy == MVT::v2i1 \|\| VecTy == MVT::v4i1 \|\| VecTy == MVT::v8i1);
	MVT OpTy = ty(Op.getOperand(0));
	// Scale is how many times the operands need to be contracted to match
	// the representation in the target register.
	unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
	assert(Scale == Op.getNumOperands() && Scale > 1);

	// First, convert all bool vectors to integers, then generate pairwise
	// inserts to form values of doubled length. Up until there are only
	// two values left to concatenate, all of these values will fit in a
	// 32-bit integer, so keep them as i32 to use 32-bit inserts.
	SmallVector<SDValue,4> Words[2];
	unsigned IdxW = 0;

	for (SDValue P : Op.getNode()->op_values()) {
	SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
	for (unsigned R = Scale; R > 1; R /= 2) {
	W = contractPredicate(W, dl, DAG);
	W = getCombine(DAG.getUNDEF(MVT::i32), W, dl, MVT::i64, DAG);
	}
	W = LoHalf(W, DAG);
	Words[IdxW].push_back(W);
	}

	while (Scale > 2) {
	SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
	Words[IdxW ^ 1].clear();

	for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
	SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
	// Insert W1 into W0 right next to the significant bits of W0.
	SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
	{W0, W1, WidthV, WidthV});
	Words[IdxW ^ 1].push_back(T);
	}
	IdxW ^= 1;
	Scale /= 2;
	}

	// At this point there should only be two words left, and Scale should be 2.
	assert(Scale == 2 && Words[IdxW].size() == 2);

	SDValue WW = getCombine(Words[IdxW][1], Words[IdxW][0], dl, MVT::i64, DAG);
	return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
	}

	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Vec = Op.getOperand(0);
	MVT ElemTy = ty(Vec).getVectorElementType();
	return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
	}

	SDValue
	HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
	ty(Op), ty(Op), DAG);
	}

	SDValue
	HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
	SDLoc(Op), ty(Op).getVectorElementType(), DAG);
	}

	SDValue
	HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue ValV = Op.getOperand(1);
	return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
	SDLoc(Op), ty(ValV), DAG);
	}

	bool
	HexagonTargetLowering::allowTruncateForTailCall(Type Ty1, Type Ty2) const {
	// Assuming the caller does not have either a signext or zeroext modifier, and
	// only one value is accepted, any reasonable truncation is allowed.
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;

	// FIXME: in principle up to 64-bit could be made safe, but it would be very
	// fragile at the moment: any support for multiple value returns would be
	// liable to disallow tail calls involving i64 -> iN truncation in many cases.
	return Ty1->getPrimitiveSizeInBits() <= 32;
	}

	SDValue
	HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
	MVT Ty = ty(Op);
	const SDLoc &dl(Op);
	LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
	MVT MemTy = LN->getMemoryVT().getSimpleVT();
	ISD::LoadExtType ET = LN->getExtensionType();

	bool LoadPred = MemTy == MVT::v2i1 \|\| MemTy == MVT::v4i1 \|\| MemTy == MVT::v8i1;
	if (LoadPred) {
	SDValue NL = DAG.getLoad(
	LN->getAddressingMode(), ISD::ZEXTLOAD, MVT::i32, dl, LN->getChain(),
	LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
	/MemoryVT/ MVT::i8, LN->getAlign(), LN->getMemOperand()->getFlags(),
	LN->getAAInfo(), LN->getRanges());
	LN = cast<LoadSDNode>(NL.getNode());
	}

	Align ClaimAlign = LN->getAlign();
	if (!validateConstPtrAlignment(LN->getBasePtr(), ClaimAlign, dl, DAG))
	return replaceMemWithUndef(Op, DAG);

	// Call LowerUnalignedLoad for all loads, it recognizes loads that
	// don't need extra aligning.
	SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
	if (LoadPred) {
	SDValue TP = getInstr(Hexagon::C2_tfrrp, dl, MemTy, {LU}, DAG);
	if (ET == ISD::SEXTLOAD) {
	TP = DAG.getSExtOrTrunc(TP, dl, Ty);
	} else if (ET != ISD::NON_EXTLOAD) {
	TP = DAG.getZExtOrTrunc(TP, dl, Ty);
	}
	SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
	return DAG.getMergeValues({TP, Ch}, dl);
	}
	return LU;
	}

	SDValue
	HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
	const SDLoc &dl(Op);
	StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
	SDValue Val = SN->getValue();
	MVT Ty = ty(Val);

	if (Ty == MVT::v2i1 \|\| Ty == MVT::v4i1 \|\| Ty == MVT::v8i1) {
	// Store the exact predicate (all bits).
	SDValue TR = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {Val}, DAG);
	SDValue NS = DAG.getTruncStore(SN->getChain(), dl, TR, SN->getBasePtr(),
	MVT::i8, SN->getMemOperand());
	if (SN->isIndexed()) {
	NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
	SN->getAddressingMode());
	}
	SN = cast<StoreSDNode>(NS.getNode());
	}

	Align ClaimAlign = SN->getAlign();
	if (!validateConstPtrAlignment(SN->getBasePtr(), ClaimAlign, dl, DAG))
	return replaceMemWithUndef(Op, DAG);

	MVT StoreTy = SN->getMemoryVT().getSimpleVT();
	Align NeedAlign = Subtarget.getTypeAlignment(StoreTy);
	if (ClaimAlign < NeedAlign)
	return expandUnalignedStore(SN, DAG);
	return SDValue(SN, 0);
	}

	SDValue
	HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
	const {
	LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
	MVT LoadTy = ty(Op);
	unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy).value();
	unsigned HaveAlign = LN->getAlign().value();
	if (HaveAlign >= NeedAlign)
	return Op;

	const SDLoc &dl(Op);
	const DataLayout &DL = DAG.getDataLayout();
	LLVMContext &Ctx = *DAG.getContext();

	// If the load aligning is disabled or the load can be broken up into two
	// smaller legal loads, do the default (target-independent) expansion.
	bool DoDefault = false;
	// Handle it in the default way if this is an indexed load.
	if (!LN->isUnindexed())
	DoDefault = true;

	if (!AlignLoads) {
	if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
	*LN->getMemOperand()))
	return Op;
	DoDefault = true;
	}
	if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
	// The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
	MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
	: MVT::getVectorVT(MVT::i8, HaveAlign);
	DoDefault =
	allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
	}
	if (DoDefault) {
	std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
	return DAG.getMergeValues({P.first, P.second}, dl);
	}

	// The code below generates two loads, both aligned as NeedAlign, and
	// with the distance of NeedAlign between them. For that to cover the
	// bits that need to be loaded (and without overlapping), the size of
	// the loads should be equal to NeedAlign. This is true for all loadable
	// types, but add an assertion in case something changes in the future.
	assert(LoadTy.getSizeInBits() == 8*NeedAlign);

	unsigned LoadLen = NeedAlign;
	SDValue Base = LN->getBasePtr();
	SDValue Chain = LN->getChain();
	auto BO = getBaseAndOffset(Base);
	unsigned BaseOpc = BO.first.getOpcode();
	if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
	return Op;

	if (BO.second % LoadLen != 0) {
	BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
	DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
	BO.second -= BO.second % LoadLen;
	}
	SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
	? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
	DAG.getConstant(NeedAlign, dl, MVT::i32))
	: BO.first;
	SDValue Base0 =
	DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::Fixed(BO.second), dl);
	SDValue Base1 = DAG.getMemBasePlusOffset(
	BaseNoOff, TypeSize::Fixed(BO.second + LoadLen), dl);

	MachineMemOperand *WideMMO = nullptr;
	if (MachineMemOperand *MMO = LN->getMemOperand()) {
	MachineFunction &MF = DAG.getMachineFunction();
	WideMMO = MF.getMachineMemOperand(
	MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
	MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
	MMO->getSuccessOrdering(), MMO->getFailureOrdering());
	}

	SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
	SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);

	SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
	{Load1, Load0, BaseNoOff.getOperand(0)});
	SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	Load0.getValue(1), Load1.getValue(1));
	SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
	return M;
	}

	SDValue
	HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
	SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
	auto *CY = dyn_cast<ConstantSDNode>(Y);
	if (!CY)
	return SDValue();

	const SDLoc &dl(Op);
	SDVTList VTs = Op.getNode()->getVTList();
	assert(VTs.NumVTs == 2);
	assert(VTs.VTs[1] == MVT::i1);
	unsigned Opc = Op.getOpcode();

	if (CY) {
	uint32_t VY = CY->getZExtValue();
	assert(VY != 0 && "This should have been folded");
	// X +/- 1
	if (VY != 1)
	return SDValue();

	if (Opc == ISD::UADDO) {
	SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
	SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
	ISD::SETEQ);
	return DAG.getMergeValues({Op, Ov}, dl);
	}
	if (Opc == ISD::USUBO) {
	SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
	SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
	DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
	return DAG.getMergeValues({Op, Ov}, dl);
	}
	}

	return SDValue();
	}

	SDValue
	HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
	const SDLoc &dl(Op);
	unsigned Opc = Op.getOpcode();
	SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);

	if (Opc == ISD::ADDCARRY)
	return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
	{ X, Y, C });

	EVT CarryTy = C.getValueType();
	SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
	{ X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
	SDValue Out[] = { SubC.getValue(0),
	DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
	return DAG.getMergeValues(Out, dl);
	}

	SDValue
	HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Offset = Op.getOperand(1);
	SDValue Handler = Op.getOperand(2);
	SDLoc dl(Op);
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	// Mark function as containing a call to EH_RETURN.
	HexagonMachineFunctionInfo *FuncInfo =
	DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
	FuncInfo->setHasEHReturn();

	unsigned OffsetReg = Hexagon::R28;

	SDValue StoreAddr =
	DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
	DAG.getIntPtrConstant(4, dl));
	Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
	Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);

	// Not needed we already use it as explict input to EH_RETURN.
	// MF.getRegInfo().addLiveOut(OffsetReg);

	return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
	}

	SDValue
	HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	unsigned Opc = Op.getOpcode();

	// Handle INLINEASM first.
	if (Opc == ISD::INLINEASM \|\| Opc == ISD::INLINEASM_BR)
	return LowerINLINEASM(Op, DAG);

	if (isHvxOperation(Op.getNode(), DAG)) {
	// If HVX lowering returns nothing, try the default lowering.
	if (SDValue V = LowerHvxOperation(Op, DAG))
	return V;
	}

	switch (Opc) {
	default:
	#ifndef NDEBUG
	Op.getNode()->dumpr(&DAG);
	if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
	errs() << "Error: check for a non-legal type in this operation\n";
	#endif
	llvm_unreachable("Should not custom lower this!");
	case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
	case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
	case ISD::BITCAST: return LowerBITCAST(Op, DAG);
	case ISD::LOAD: return LowerLoad(Op, DAG);
	case ISD::STORE: return LowerStore(Op, DAG);
	case ISD::UADDO:
	case ISD::USUBO: return LowerUAddSubO(Op, DAG);
	case ISD::ADDCARRY:
	case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
	case ISD::SRA:
	case ISD::SHL:
	case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
	case ISD::ROTL: return LowerROTL(Op, DAG);
	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
	case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
	case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
	case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
	case ISD::VASTART: return LowerVASTART(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
	case ISD::SETCC: return LowerSETCC(Op, DAG);
	case ISD::VSELECT: return LowerVSELECT(Op, DAG);
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
	case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
	case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
	break;
	}

	return SDValue();
	}

	void
	HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	if (isHvxOperation(N, DAG)) {
	LowerHvxOperationWrapper(N, Results, DAG);
	if (!Results.empty())
	return;
	}

	SDValue Op(N, 0);
	unsigned Opc = N->getOpcode();

	switch (Opc) {
	case HexagonISD::SSAT:
	case HexagonISD::USAT:
	Results.push_back(opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG));
	break;
	case ISD::STORE:
	// We are only custom-lowering stores to verify the alignment of the
	// address if it is a compile-time constant. Since a store can be
	// modified during type-legalization (the value being stored may need
	// legalization), return empty Results here to indicate that we don't
	// really make any changes in the custom lowering.
	return;
	default:
	TargetLowering::LowerOperationWrapper(N, Results, DAG);
	break;
	}
	}

	void
	HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	if (isHvxOperation(N, DAG)) {
	ReplaceHvxNodeResults(N, Results, DAG);
	if (!Results.empty())
	return;
	}

	const SDLoc &dl(N);
	switch (N->getOpcode()) {
	case ISD::SRL:
	case ISD::SRA:
	case ISD::SHL:
	return;
	case ISD::BITCAST:
	// Handle a bitcast from v8i1 to i8.
	if (N->getValueType(0) == MVT::i8) {
	if (N->getOperand(0).getValueType() == MVT::v8i1) {
	SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
	N->getOperand(0), DAG);
	SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
	Results.push_back(T);
	}
	}
	break;
	}
	}

	SDValue
	HexagonTargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (isHvxOperation(N, DCI.DAG)) {
	if (SDValue V = PerformHvxDAGCombine(N, DCI))
	return V;
	return SDValue();
	}

	SDValue Op(N, 0);
	const SDLoc &dl(Op);
	unsigned Opc = Op.getOpcode();

	if (Opc == ISD::TRUNCATE) {
	SDValue Op0 = Op.getOperand(0);
	// fold (truncate (build pair x, y)) -> (truncate x) or x
	if (Op0.getOpcode() == ISD::BUILD_PAIR) {
	EVT TruncTy = Op.getValueType();
	SDValue Elem0 = Op0.getOperand(0);
	// if we match the low element of the pair, just return it.
	if (Elem0.getValueType() == TruncTy)
	return Elem0;
	// otherwise, if the low part is still too large, apply the truncate.
	if (Elem0.getValueType().bitsGT(TruncTy))
	return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
	}
	}

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (Opc == HexagonISD::P2D) {
	SDValue P = Op.getOperand(0);
	switch (P.getOpcode()) {
	case HexagonISD::PTRUE:
	return DCI.DAG.getConstant(-1, dl, ty(Op));
	case HexagonISD::PFALSE:
	return getZero(dl, ty(Op), DCI.DAG);
	default:
	break;
	}
	} else if (Opc == ISD::VSELECT) {
	// This is pretty much duplicated in HexagonISelLoweringHVX...
	//
	// (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
	SDValue Cond = Op.getOperand(0);
	if (Cond->getOpcode() == ISD::XOR) {
	SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
	if (C1->getOpcode() == HexagonISD::PTRUE) {
	SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
	Op.getOperand(2), Op.getOperand(1));
	return VSel;
	}
	}
	} else if (Opc == ISD::TRUNCATE) {
	SDValue Op0 = Op.getOperand(0);
	// fold (truncate (build pair x, y)) -> (truncate x) or x
	if (Op0.getOpcode() == ISD::BUILD_PAIR) {
	MVT TruncTy = ty(Op);
	SDValue Elem0 = Op0.getOperand(0);
	// if we match the low element of the pair, just return it.
	if (ty(Elem0) == TruncTy)
	return Elem0;
	// otherwise, if the low part is still too large, apply the truncate.
	if (ty(Elem0).bitsGT(TruncTy))
	return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
	}
	} else if (Opc == ISD::OR) {
	// fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
	// if s >= 32
	auto fold0 = [&, this](SDValue Op) {
	if (ty(Op) != MVT::i64)
	return SDValue();
	SDValue Shl = Op.getOperand(0);
	SDValue Zxt = Op.getOperand(1);
	if (Shl.getOpcode() != ISD::SHL)
	std::swap(Shl, Zxt);

	if (Shl.getOpcode() != ISD::SHL \|\| Zxt.getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();

	SDValue Z = Zxt.getOperand(0);
	auto *Amt = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
	if (Amt && Amt->getZExtValue() >= 32 && ty(Z).getSizeInBits() <= 32) {
	unsigned A = Amt->getZExtValue();
	SDValue S = Shl.getOperand(0);
	SDValue T0 = DCI.DAG.getNode(ISD::SHL, dl, ty(S), S,
	DCI.DAG.getConstant(32 - A, dl, MVT::i32));
	SDValue T1 = DCI.DAG.getZExtOrTrunc(T0, dl, MVT::i32);
	SDValue T2 = DCI.DAG.getZExtOrTrunc(Z, dl, MVT::i32);
	return DCI.DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {T1, T2});
	}
	return SDValue();
	};

	if (SDValue R = fold0(Op))
	return R;
	}

	return SDValue();
	}

	/// Returns relocation base for the given PIC jumptable.
	SDValue
	HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
	SelectionDAG &DAG) const {
	int Idx = cast<JumpTableSDNode>(Table)->getIndex();
	EVT VT = Table.getValueType();
	SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
	return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
	}

	//===----------------------------------------------------------------------===//
	// Inline Assembly Support
	//===----------------------------------------------------------------------===//

	TargetLowering::ConstraintType
	HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'q':
	case 'v':
	if (Subtarget.useHVXOps())
	return C_RegisterClass;
	break;
	case 'a':
	return C_RegisterClass;
	default:
	break;
	}
	}
	return TargetLowering::getConstraintType(Constraint);
	}

	std::pair<unsigned, const TargetRegisterClass*>
	HexagonTargetLowering::getRegForInlineAsmConstraint(
	const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {

	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'r': // R0-R31
	switch (VT.SimpleTy) {
	default:
	return {0u, nullptr};
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	case MVT::f32:
	return {0u, &Hexagon::IntRegsRegClass};
	case MVT::i64:
	case MVT::f64:
	return {0u, &Hexagon::DoubleRegsRegClass};
	}
	break;
	case 'a': // M0-M1
	if (VT != MVT::i32)
	return {0u, nullptr};
	return {0u, &Hexagon::ModRegsRegClass};
	case 'q': // q0-q3
	switch (VT.getSizeInBits()) {
	default:
	return {0u, nullptr};
	case 64:
	case 128:
	return {0u, &Hexagon::HvxQRRegClass};
	}
	break;
	case 'v': // V0-V31
	switch (VT.getSizeInBits()) {
	default:
	return {0u, nullptr};
	case 512:
	return {0u, &Hexagon::HvxVRRegClass};
	case 1024:
	if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
	return {0u, &Hexagon::HvxVRRegClass};
	return {0u, &Hexagon::HvxWRRegClass};
	case 2048:
	return {0u, &Hexagon::HvxWRRegClass};
	}
	break;
	default:
	return {0u, nullptr};
	}
	}

	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
	}

	/// isFPImmLegal - Returns true if the target can instruction select the
	/// specified FP immediate natively. If false, the legalizer will
	/// materialize the FP immediate as a load from a constant pool.
	bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
	bool ForCodeSize) const {
	return true;
	}

	/// isLegalAddressingMode - Return true if the addressing mode represented by
	/// AM is legal for this target, for a load/store of the specified type.
	bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS, Instruction *I) const {
	if (Ty->isSized()) {
	// When LSR detects uses of the same base address to access different
	// types (e.g. unions), it will assume a conservative type for these
	// uses:
	// LSR Use: Kind=Address of void in addrspace(4294967295), ...
	// The type Ty passed here would then be "void". Skip the alignment
	// checks, but do not return false right away, since that confuses
	// LSR into crashing.
	Align A = DL.getABITypeAlign(Ty);
	// The base offset must be a multiple of the alignment.
	if (!isAligned(A, AM.BaseOffs))
	return false;
	// The shifted offset must fit in 11 bits.
	if (!isInt<11>(AM.BaseOffs >> Log2(A)))
	return false;
	}

	// No global is ever allowed as a base.
	if (AM.BaseGV)
	return false;

	int Scale = AM.Scale;
	if (Scale < 0)
	Scale = -Scale;
	switch (Scale) {
	case 0: // No scale reg, "r+i", "r", or just "i".
	break;
	default: // No scaled addressing mode.
	return false;
	}
	return true;
	}

	/// Return true if folding a constant offset with the given GlobalAddress is
	/// legal. It is frequently not legal in PIC relocation models.
	bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
	const {
	return HTM.getRelocationModel() == Reloc::Static;
	}

	/// isLegalICmpImmediate - Return true if the specified immediate is legal
	/// icmp immediate, that is the target has icmp instructions which can compare
	/// a register against the immediate without having to materialize the
	/// immediate into a register.
	bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
	return Imm >= -512 && Imm <= 511;
	}

	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
	/// for tail call optimization. Targets which want to do tail call
	/// optimization should implement this function.
	bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
	SDValue Callee,
	CallingConv::ID CalleeCC,
	bool IsVarArg,
	bool IsCalleeStructRet,
	bool IsCallerStructRet,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	SelectionDAG& DAG) const {
	const Function &CallerF = DAG.getMachineFunction().getFunction();
	CallingConv::ID CallerCC = CallerF.getCallingConv();
	bool CCMatch = CallerCC == CalleeCC;

	// ***************************************************************************
	// Look for obvious safe cases to perform tail call optimization that do not
	// require ABI changes.
	// ***************************************************************************

	// If this is a tail call via a function pointer, then don't do it!
	if (!isa<GlobalAddressSDNode>(Callee) &&
	!isa<ExternalSymbolSDNode>(Callee)) {
	return false;
	}

	// Do not optimize if the calling conventions do not match and the conventions
	// used are not C or Fast.
	if (!CCMatch) {
	bool R = (CallerCC == CallingConv::C \|\| CallerCC == CallingConv::Fast);
	bool E = (CalleeCC == CallingConv::C \|\| CalleeCC == CallingConv::Fast);
	// If R & E, then ok.
	if (!R \|\| !E)
	return false;
	}

	// Do not tail call optimize vararg calls.
	if (IsVarArg)
	return false;

	// Also avoid tail call optimization if either caller or callee uses struct
	// return semantics.
	if (IsCalleeStructRet \|\| IsCallerStructRet)
	return false;

	// In addition to the cases above, we also disable Tail Call Optimization if
	// the calling convention code that at least one outgoing argument needs to
	// go on the stack. We cannot check that here because at this point that
	// information is not available.
	return true;
	}

	/// Returns the target specific optimal type for load and store operations as
	/// a result of memset, memcpy, and memmove lowering.
	///
	/// If DstAlign is zero that means it's safe to destination alignment can
	/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
	/// a need to check it against alignment requirement, probably because the
	/// source does not need to be loaded. If 'IsMemset' is true, that means it's
	/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
	/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
	/// does not need to be loaded. It returns EVT::Other if the type should be
	/// determined using generic target-independent logic.
	EVT HexagonTargetLowering::getOptimalMemOpType(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	if (Op.size() >= 8 && Op.isAligned(Align(8)))
	return MVT::i64;
	if (Op.size() >= 4 && Op.isAligned(Align(4)))
	return MVT::i32;
	if (Op.size() >= 2 && Op.isAligned(Align(2)))
	return MVT::i16;
	return MVT::Other;
	}

	bool HexagonTargetLowering::allowsMemoryAccess(
	LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
	Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
	MVT SVT = VT.getSimpleVT();
	if (Subtarget.isHVXVectorType(SVT, true))
	return allowsHvxMemoryAccess(SVT, Flags, Fast);
	return TargetLoweringBase::allowsMemoryAccess(
	Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
	}

	bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
	unsigned *Fast) const {
	MVT SVT = VT.getSimpleVT();
	if (Subtarget.isHVXVectorType(SVT, true))
	return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
	if (Fast)
	*Fast = 0;
	return false;
	}

	std::pair<const TargetRegisterClass*, uint8_t>
	HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
	MVT VT) const {
	if (Subtarget.isHVXVectorType(VT, true)) {
	unsigned BitWidth = VT.getSizeInBits();
	unsigned VecWidth = Subtarget.getVectorLength() * 8;

	if (VT.getVectorElementType() == MVT::i1)
	return std::make_pair(&Hexagon::HvxQRRegClass, 1);
	if (BitWidth == VecWidth)
	return std::make_pair(&Hexagon::HvxVRRegClass, 1);
	assert(BitWidth == 2 * VecWidth);
	return std::make_pair(&Hexagon::HvxWRRegClass, 1);
	}

	return TargetLowering::findRepresentativeClass(TRI, VT);
	}

	bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
	ISD::LoadExtType ExtTy, EVT NewVT) const {
	// TODO: This may be worth removing. Check regression tests for diffs.
	if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
	return false;

	auto *L = cast<LoadSDNode>(Load);
	std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
	// Small-data object, do not shrink.
	if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
	return false;
	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
	auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
	const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
	return !GO \|\| !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
	}
	return true;
	}

	void HexagonTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
	SDNode *Node) const {
	AdjustHvxInstrPostInstrSelection(MI, Node);
	}

	Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
	Type ValueTy, Value Addr,
	AtomicOrdering Ord) const {
	BasicBlock *BB = Builder.GetInsertBlock();
	Module *M = BB->getParent()->getParent();
	unsigned SZ = ValueTy->getPrimitiveSizeInBits();
	assert((SZ == 32 \|\| SZ == 64) && "Only 32/64-bit atomic loads supported");
	Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
	: Intrinsic::hexagon_L4_loadd_locked;
	Function *Fn = Intrinsic::getDeclaration(M, IntID);

	auto PtrTy = cast<PointerType>(Addr->getType());
	PointerType *NewPtrTy =
	Builder.getIntNTy(SZ)->getPointerTo(PtrTy->getAddressSpace());
	Addr = Builder.CreateBitCast(Addr, NewPtrTy);

	Value *Call = Builder.CreateCall(Fn, Addr, "larx");

	return Builder.CreateBitCast(Call, ValueTy);
	}

	/// Perform a store-conditional operation to Addr. Return the status of the
	/// store. This should be 0 if the store succeeded, non-zero otherwise.
	Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
	Value Val, Value Addr,
	AtomicOrdering Ord) const {
	BasicBlock *BB = Builder.GetInsertBlock();
	Module *M = BB->getParent()->getParent();
	Type *Ty = Val->getType();
	unsigned SZ = Ty->getPrimitiveSizeInBits();

	Type *CastTy = Builder.getIntNTy(SZ);
	assert((SZ == 32 \|\| SZ == 64) && "Only 32/64-bit atomic stores supported");
	Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
	: Intrinsic::hexagon_S4_stored_locked;
	Function *Fn = Intrinsic::getDeclaration(M, IntID);

	unsigned AS = Addr->getType()->getPointerAddressSpace();
	Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
	Val = Builder.CreateBitCast(Val, CastTy);

	Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
	Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
	Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
	return Ext;
	}

	TargetLowering::AtomicExpansionKind
	HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	// Do not expand loads and stores that don't exceed 64 bits.
	return LI->getType()->getPrimitiveSizeInBits() > 64
	? AtomicExpansionKind::LLOnly
	: AtomicExpansionKind::None;
	}

	TargetLowering::AtomicExpansionKind
	HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	// Do not expand loads and stores that don't exceed 64 bits.
	return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
	? AtomicExpansionKind::Expand
	: AtomicExpansionKind::None;
	}

	TargetLowering::AtomicExpansionKind
	HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
	AtomicCmpXchgInst *AI) const {
	return AtomicExpansionKind::LLSC;
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
	index a75ac0e1378e..375e519a6848 100644
	--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
	+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
	@@ -1,3358 +1,3378 @@
	//===- HexagonPatterns.td - Selection Patterns for Hexagon -- tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	// Table of contents:
	// (0) Definitions
	// (1) Immediates
	// (2) Type casts
	// (3) Extend/truncate/saturate
	// (4) Logical
	// (5) Compare
	// (6) Select
	// (7) Insert/extract
	// (8) Shift/permute
	// (9) Arithmetic/bitwise
	// (10) Bit
	// (11) PIC
	// (12) Load
	// (13) Store
	// (14) Memop
	// (15) Call
	// (16) Branch
	// (17) Misc

	// Guidelines (in no particular order):
	// 1. Avoid relying on pattern ordering to give preference to one pattern
	// over another, prefer using AddedComplexity instead. The reason for
	// this is to avoid unintended conseqeuences (caused by altering the
	// order) when making changes. The current order of patterns in this
	// file obviously does play some role, but none of the ordering was
	// deliberately chosen (other than to create a logical structure of
	// this file). When making changes, adding AddedComplexity to existing
	// patterns may be needed.
	// 2. Maintain the logical structure of the file, try to put new patterns
	// in designated sections.
	// 3. Do not use A2_combinew instruction directly, use Combinew fragment
	// instead. It uses REG_SEQUENCE, which is more amenable to optimizations.
	// 4. Most selection macros are based on PatFrags. For DAGs that involve
	// SDNodes, use pf1/pf2 to convert them to PatFrags. Use common frags
	// whenever possible (see the Definitions section). When adding new
	// macro, try to make is general to enable reuse across sections.
	// 5. Compound instructions (e.g. Rx+Rs*Rt) are generated under the condition
	// that the nested operation has only one use. Having it separated in case
	// of multiple uses avoids duplication of (processor) work.
	// 6. The v4 vector instructions (64-bit) are treated as core instructions,
	// for example, A2_vaddh is in the "arithmetic" section with A2_add.
	// 7. When adding a pattern for an instruction with a constant-extendable
	// operand, allow all possible kinds of inputs for the immediate value
	// (see AnyImm/anyimm and their variants in the Definitions section).


	// --(0) Definitions -----------------------------------------------------
	//

	// This complex pattern exists only to create a machine instruction operand
	// of type "frame index". There doesn't seem to be a way to do that directly
	// in the patterns.
	def AddrFI: ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;

	// These complex patterns are not strictly necessary, since global address
	// folding will happen during DAG combining. For distinguishing between GA
	// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
	def AddrGA: ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
	def AddrGP: ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
	def AnyImm: ComplexPattern<i32, 1, "SelectAnyImm", [], []>;
	def AnyInt: ComplexPattern<i32, 1, "SelectAnyInt", [], []>;

	// Global address or a constant being a multiple of 2^n.
	def AnyImm0: ComplexPattern<i32, 1, "SelectAnyImm0", [], []>;
	def AnyImm1: ComplexPattern<i32, 1, "SelectAnyImm1", [], []>;
	def AnyImm2: ComplexPattern<i32, 1, "SelectAnyImm2", [], []>;
	def AnyImm3: ComplexPattern<i32, 1, "SelectAnyImm3", [], []>;


	// Type helper frags.
	def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
	def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
	def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
	def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
	def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;

	def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
	def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
	def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;

	def SDTVecLeaf:
	SDTypeProfile<1, 0, [SDTCisVec<0>]>;
	def SDTVecVecIntOp:
	SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
	SDTCisVT<3,i32>]>;

	def HexagonPTRUE: SDNode<"HexagonISD::PTRUE", SDTVecLeaf>;
	def HexagonPFALSE: SDNode<"HexagonISD::PFALSE", SDTVecLeaf>;
	def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>;
	def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
	def HexagonMULHUS: SDNode<"HexagonISD::MULHUS", SDTIntBinOp>;

	def SDTSaturate:
	SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, OtherVT>]>;
	def HexagonSSAT: SDNode<"HexagonISD::SSAT", SDTSaturate>;
	def HexagonUSAT: SDNode<"HexagonISD::USAT", SDTSaturate>;

	def ptrue: PatFrag<(ops), (HexagonPTRUE)>;
	def pfalse: PatFrag<(ops), (HexagonPFALSE)>;
	def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>;

	def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
	(HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
	def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;

	def ssat: PatFrag<(ops node:$V, node:$Ty), (HexagonSSAT node:$V, node:$Ty)>;
	def usat: PatFrag<(ops node:$V, node:$Ty), (HexagonUSAT node:$V, node:$Ty)>;

	// Pattern fragments to extract the low and high subregisters from a
	// 64-bit value.
	-def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
	-def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
	+def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_lo)>;
	+def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_hi)>;

	def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
	return isOrEquivalentToAdd(N);
	}]>;

	def IsPow2_32: PatLeaf<(i32 imm), [{
	uint32_t V = N->getZExtValue();
	return isPowerOf2_32(V);
	}]>;

	def IsPow2_64: PatLeaf<(i64 imm), [{
	uint64_t V = N->getZExtValue();
	return isPowerOf2_64(V);
	}]>;

	def IsNPow2_32: PatLeaf<(i32 imm), [{
	uint32_t NV = ~N->getZExtValue();
	return isPowerOf2_32(NV);
	}]>;

	def IsPow2_64L: PatLeaf<(i64 imm), [{
	uint64_t V = N->getZExtValue();
	return isPowerOf2_64(V) && Log2_64(V) < 32;
	}]>;

	def IsPow2_64H: PatLeaf<(i64 imm), [{
	uint64_t V = N->getZExtValue();
	return isPowerOf2_64(V) && Log2_64(V) >= 32;
	}]>;

	def IsNPow2_64L: PatLeaf<(i64 imm), [{
	uint64_t NV = ~N->getZExtValue();
	return isPowerOf2_64(NV) && Log2_64(NV) < 32;
	}]>;

	def IsNPow2_64H: PatLeaf<(i64 imm), [{
	uint64_t NV = ~N->getZExtValue();
	return isPowerOf2_64(NV) && Log2_64(NV) >= 32;
	}]>;

	class IsULE<int Width, int Arg>: PatLeaf<(i32 imm),
	"uint64_t V = N->getZExtValue();" #
	"return isUInt<" # Width # ">(V) && V <= " # Arg # ";"
	>;

	class IsUGT<int Width, int Arg>: PatLeaf<(i32 imm),
	"uint64_t V = N->getZExtValue();" #
	"return isUInt<" # Width # ">(V) && V > " # Arg # ";"
	>;

	def SDEC1: SDNodeXForm<imm, [{
	int32_t V = N->getSExtValue();
	return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
	}]>;

	def UDEC1: SDNodeXForm<imm, [{
	uint32_t V = N->getZExtValue();
	assert(V >= 1);
	return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
	}]>;

	def UDEC32: SDNodeXForm<imm, [{
	uint32_t V = N->getZExtValue();
	assert(V >= 32);
	return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
	}]>;

	class Subi<int From>: SDNodeXForm<imm,
	"int32_t V = " # From # " - N->getSExtValue();" #
	"return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);"
	>;

	def Log2_32: SDNodeXForm<imm, [{
	uint32_t V = N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
	}]>;

	def Log2_64: SDNodeXForm<imm, [{
	uint64_t V = N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32);
	}]>;

	def LogN2_32: SDNodeXForm<imm, [{
	uint32_t NV = ~N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
	}]>;

	def LogN2_64: SDNodeXForm<imm, [{
	uint64_t NV = ~N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32);
	}]>;

	def NegImm8: SDNodeXForm<imm, [{
	int8_t NV = -N->getSExtValue();
	return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
	}]>;

	def NegImm16: SDNodeXForm<imm, [{
	int16_t NV = -N->getSExtValue();
	return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
	}]>;

	def NegImm32: SDNodeXForm<imm, [{
	int32_t NV = -N->getSExtValue();
	return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
	}]>;

	def SplatB: SDNodeXForm<imm, [{
	uint32_t V = N->getZExtValue();
	assert(isUInt<8>(V) \|\| V >> 8 == 0xFFFFFF);
	V &= 0xFF;
	uint32_t S = V << 24 \| V << 16 \| V << 8 \| V;
	return CurDAG->getTargetConstant(S, SDLoc(N), MVT::i32);
	}]>;

	def SplatH: SDNodeXForm<imm, [{
	uint32_t V = N->getZExtValue();
	assert(isUInt<16>(V) \|\| V >> 16 == 0xFFFF);
	V &= 0xFFFF;
	return CurDAG->getTargetConstant(V << 16 \| V, SDLoc(N), MVT::i32);
	}]>;


	// Helpers for type promotions/contractions.
	def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
	def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
	def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
	def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
	def ToAext64: OutPatFrag<(ops node:$Rs),
	(REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>;

	def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt),
	(REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>;

	def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
	def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
	def anyimm: PatLeaf<(i32 AnyImm:$Imm)>;
	def anyint: PatLeaf<(i32 AnyInt:$Imm)>;

	// Global address or an aligned constant.
	def anyimm0: PatLeaf<(i32 AnyImm0:$Addr)>;
	def anyimm1: PatLeaf<(i32 AnyImm1:$Addr)>;
	def anyimm2: PatLeaf<(i32 AnyImm2:$Addr)>;
	def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>;

	def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
	def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
	def f32zero: PatLeaf<(f32 fpimm:$F), [{
	return N->isExactlyValue(APFloat::getZero(APFloat::IEEEsingle(), false));
	}]>;

	// This complex pattern is really only to detect various forms of
	// sign-extension i32->i64. The selected value will be of type i64
	// whose low word is the value being extended. The high word is
	// unspecified.
	def Usxtw: ComplexPattern<i64, 1, "DetectUseSxtw", [], []>;

	def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
	def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
	def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;

	def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>;
	def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>;

	def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
	(PS_fi (i32 AddrFI:$Rs), imm:$off)>;


	// Converters from unary/binary SDNode to PatFrag.
	class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>;
	class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;

	class Not2<PatFrag P>
	: PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
	class VNot2<PatFrag P, PatFrag Not>
	: PatFrag<(ops node:$A, node:$B), (P node:$A, (Not node:$B))>;

	// If there is a constant operand that feeds the and/or instruction,
	// do not generate the compound instructions.
	// It is not always profitable, as some times we end up with a transfer.
	// Check the below example.
	// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra)
	// Instead this is preferable.
	// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra)
	class Su_ni1<PatFrag Op>
	: PatFrag<Op.Operands, !head(Op.Fragments), [{
	if (hasOneUse(N)){
	// Check if Op1 is an immediate operand.
	SDValue Op1 = N->getOperand(1);
	return !isa<ConstantSDNode>(Op1);
	}
	return false;}],
	Op.OperandTransform>;

	class Su<PatFrag Op>
	: PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
	Op.OperandTransform>;

	// Main selection macros.

	class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred>
	: Pat<(ResVT (Op RegPred:$Rs)), (MI RegPred:$Rs)>;

	class OpR_RI_pat<InstHexagon MI, PatFrag Op, ValueType ResType,
	PatFrag RegPred, PatFrag ImmPred>
	: Pat<(ResType (Op RegPred:$Rs, ImmPred:$I)),
	(MI RegPred:$Rs, imm:$I)>;

	class OpR_RR_pat<InstHexagon MI, PatFrag Op, ValueType ResType,
	PatFrag RsPred, PatFrag RtPred = RsPred>
	: Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
	(MI RsPred:$Rs, RtPred:$Rt)>;

	class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
	PatFrag RegPred, PatFrag ImmPred>
	: Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)),
	(MI RegPred:$Rx, RegPred:$Rs, imm:$I)>;

	class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
	PatFrag RxPred, PatFrag RsPred, PatFrag RtPred>
	: Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
	(MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;

	multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
	InstHexagon InstA, InstHexagon InstB> {
	def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B),
	(InstA Val:$A, Val:$B)>;
	def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$B, Val:$A),
	(InstB Val:$A, Val:$B)>;
	}

	multiclass MinMax_pats<InstHexagon PickT, InstHexagon PickS,
	SDPatternOperator Sel, SDPatternOperator CmpOp,
	ValueType CmpType, PatFrag CmpPred> {
	def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)),
	CmpPred:$Vt, CmpPred:$Vs),
	(PickT CmpPred:$Vs, CmpPred:$Vt)>;
	def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)),
	CmpPred:$Vs, CmpPred:$Vt),
	(PickS CmpPred:$Vs, CmpPred:$Vt)>;
	}

	// Bitcasts between same-size vector types are no-ops, except for the
	// actual type change.
	multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
	def: Pat<(Ty1 (bitconvert (Ty2 RC:$Val))), (Ty1 RC:$Val)>;
	def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>;
	}

	// Frags for commonly used SDNodes.
	def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
	def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
	def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;

	def Smin: pf2<smin>; def Smax: pf2<smax>;
	def Umin: pf2<umin>; def Umax: pf2<umax>;

	def Rol: pf2<rotl>;

	def Fptosi: pf1<fp_to_sint>;
	def Fptoui: pf1<fp_to_uint>;
	def Sitofp: pf1<sint_to_fp>;
	def Uitofp: pf1<uint_to_fp>;


	// --(1) Immediate -------------------------------------------------------
	//

	def Imm64Lo: SDNodeXForm<imm, [{
	return CurDAG->getTargetConstant(int32_t (N->getSExtValue()),
	SDLoc(N), MVT::i32);
	}]>;
	def Imm64Hi: SDNodeXForm<imm, [{
	return CurDAG->getTargetConstant(int32_t (N->getSExtValue()>>32),
	SDLoc(N), MVT::i32);
	}]>;


	def SDTHexagonCONST32
	: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>;

	def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
	def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
	def HexagonCONST32: SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
	def HexagonCONST32_GP: SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;

	def TruncI64ToI32: SDNodeXForm<imm, [{
	return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
	}]>;

	def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
	def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>;

	def: Pat<(HexagonCONST32 tglobaltlsaddr:$A), (A2_tfrsi imm:$A)>;
	def: Pat<(HexagonCONST32 bbl:$A), (A2_tfrsi imm:$A)>;
	def: Pat<(HexagonCONST32 tglobaladdr:$A), (A2_tfrsi imm:$A)>;
	def: Pat<(HexagonCONST32_GP tblockaddress:$A), (A2_tfrsi imm:$A)>;
	def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>;
	def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>;
	def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>;
	// The HVX load patterns also match CP directly. Make sure that if
	// the selection of this opcode changes, it's updated in all places.

	def: Pat<(i1 0), (PS_false)>;
	def: Pat<(i1 1), (PS_true)>;
	def: Pat<(i64 imm:$v), (CONST64 imm:$v)>,
	Requires<[UseSmallData,NotOptTinyCore]>;
	def: Pat<(i64 imm:$v),
	(Combinew (A2_tfrsi (Imm64Hi $v)), (A2_tfrsi (Imm64Lo $v)))>;

	def ftoi : SDNodeXForm<fpimm, [{
	APInt I = N->getValueAPF().bitcastToAPInt();
	return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
	MVT::getIntegerVT(I.getBitWidth()));
	}]>;

	def: Pat<(f32ImmPred:$f), (A2_tfrsi (ftoi $f))>;
	def: Pat<(f64ImmPred:$f), (CONST64 (ftoi $f))>;

	def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;

	// --(2) Type cast -------------------------------------------------------
	//

	def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
	def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;

	def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
	def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
	def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
	def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;

	def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
	def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
	def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
	def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;

	def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
	def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
	def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
	def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;

	def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
	def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
	def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
	def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;

	// Bitcast is different than [fp\|sint\|uint]_to_[sint\|uint\|fp].
	def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
	def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
	def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
	def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;

	// Bit convert 32- and 64-bit types.
	// All of these are bitcastable to one another: i32, v2i16, v4i8.
	defm: NopCast_pat<i32, v2i16, IntRegs>;
	defm: NopCast_pat<i32, v4i8, IntRegs>;
	defm: NopCast_pat<v2i16, v4i8, IntRegs>;
	// All of these are bitcastable to one another: i64, v2i32, v4i16, v8i8.
	defm: NopCast_pat<i64, v2i32, DoubleRegs>;
	defm: NopCast_pat<i64, v4i16, DoubleRegs>;
	defm: NopCast_pat<i64, v8i8, DoubleRegs>;
	defm: NopCast_pat<v2i32, v4i16, DoubleRegs>;
	defm: NopCast_pat<v2i32, v8i8, DoubleRegs>;
	defm: NopCast_pat<v4i16, v8i8, DoubleRegs>;


	// --(3) Extend/truncate/saturate ----------------------------------------
	//

	def: Pat<(sext_inreg I32:$Rs, i8), (A2_sxtb I32:$Rs)>;
	def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>;
	def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>;
	def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>;
	def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>;

	def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
	def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>;
	def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>;

	def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>;
	def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>;
	def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>;

	let AddedComplexity = 20 in {
	def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>;
	def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>;
	}

	// Extensions from i1 or vectors of i1.
	def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
	def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
	def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
	def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
	(C2_muxii PredRegs:$Pu, -1, 0))>;

	def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
	def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
	def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
	def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
	def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;

	def Vsplatpi: OutPatFrag<(ops node:$V),
	(Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;

	def: Pat<(v2i16 (azext V2I1:$Pu)),
	(A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
	def: Pat<(v2i32 (azext V2I1:$Pu)),
	(A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
	def: Pat<(v4i8 (azext V4I1:$Pu)),
	(A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
	def: Pat<(v4i16 (azext V4I1:$Pu)),
	(A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
	def: Pat<(v8i8 (azext V8I1:$Pu)),
	(A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;

	def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
	def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
	def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
	def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;

	def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
	(Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;

	def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
	(Combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;

	// Truncate: from vector B copy all 'E'ven 'B'yte elements:
	// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
	def: Pat<(v4i8 (trunc V4I16:$Rs)),
	(S2_vtrunehb V4I16:$Rs)>;

	// Truncate: from vector B copy all 'O'dd 'B'yte elements:
	// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
	// S2_vtrunohb

	// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
	// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
	// S2_vtruneh

	def: Pat<(v2i16 (trunc V2I32:$Rs)),
	(A2_combine_ll (HiReg $Rs), (LoReg $Rs))>;

	// Saturation:
	// Note: saturation assumes the same signed-ness for the input and the
	// output.
	def: Pat<(i32 (ssat I32:$Rs, i8)), (A2_satb I32:$Rs)>;
	def: Pat<(i32 (ssat I32:$Rs, i16)), (A2_sath I32:$Rs)>;
	def: Pat<(i32 (ssat I64:$Rs, i32)), (A2_sat I64:$Rs)>;
	def: Pat<(i32 (usat I32:$Rs, i8)), (A2_satub I32:$Rs)>;
	def: Pat<(i32 (usat I32:$Rs, i16)), (A2_satuh I32:$Rs)>;
	def: Pat<(i32 (usat I64:$Rs, i32)),
	(C2_mux (C2_cmpeqi (HiReg $Rs), (i32 0)), (LoReg $Rs), (i32 -1))>;

	def: Pat<(v4i8 (ssat V4I16:$Rs, v4i8)), (S2_vsathb V4I16:$Rs)>;
	def: Pat<(v2i16 (ssat V2I32:$Rs, v2i16)), (S2_vsatwh V2I32:$Rs)>;
	def: Pat<(v4i8 (usat V4I16:$Rs, v4i8)), (S2_vsathub V4I16:$Rs)>;
	def: Pat<(v2i16 (usat V2I32:$Rs, v2i16)), (S2_vsatwuh V2I32:$Rs)>;


	// --(4) Logical ---------------------------------------------------------
	//

	def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>;
	def: Pat<(pnot V2I1:$Ps), (C2_not V2I1:$Ps)>;
	def: Pat<(pnot V4I1:$Ps), (C2_not V4I1:$Ps)>;
	def: Pat<(pnot V8I1:$Ps), (C2_not V8I1:$Ps)>;
	def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>;

	def: OpR_RR_pat<C2_and, And, i1, I1>;
	def: OpR_RR_pat<C2_or, Or, i1, I1>;
	def: OpR_RR_pat<C2_xor, Xor, i1, I1>;
	def: OpR_RR_pat<C2_andn, Not2<And>, i1, I1>;
	def: OpR_RR_pat<C2_orn, Not2<Or>, i1, I1>;

	def: AccRRR_pat<C4_and_and, And, Su<And>, I1, I1, I1>;
	def: AccRRR_pat<C4_and_or, And, Su< Or>, I1, I1, I1>;
	def: AccRRR_pat<C4_or_and, Or, Su<And>, I1, I1, I1>;
	def: AccRRR_pat<C4_or_or, Or, Su< Or>, I1, I1, I1>;
	def: AccRRR_pat<C4_and_andn, And, Su<Not2<And>>, I1, I1, I1>;
	def: AccRRR_pat<C4_and_orn, And, Su<Not2< Or>>, I1, I1, I1>;
	def: AccRRR_pat<C4_or_andn, Or, Su<Not2<And>>, I1, I1, I1>;
	def: AccRRR_pat<C4_or_orn, Or, Su<Not2< Or>>, I1, I1, I1>;

	multiclass BoolvOpR_RR_pat<InstHexagon MI, PatFrag VOp> {
	def: OpR_RR_pat<MI, VOp, v2i1, V2I1>;
	def: OpR_RR_pat<MI, VOp, v4i1, V4I1>;
	def: OpR_RR_pat<MI, VOp, v8i1, V8I1>;
	}

	multiclass BoolvAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag VOp> {
	def: AccRRR_pat<MI, AccOp, VOp, V2I1, V2I1, V2I1>;
	def: AccRRR_pat<MI, AccOp, VOp, V4I1, V4I1, V4I1>;
	def: AccRRR_pat<MI, AccOp, VOp, V8I1, V8I1, V8I1>;
	}

	defm: BoolvOpR_RR_pat<C2_and, And>;
	defm: BoolvOpR_RR_pat<C2_or, Or>;
	defm: BoolvOpR_RR_pat<C2_xor, Xor>;
	defm: BoolvOpR_RR_pat<C2_andn, VNot2<And, pnot>>;
	defm: BoolvOpR_RR_pat<C2_orn, VNot2< Or, pnot>>;

	// op(Ps, op(Pt, Pu))
	defm: BoolvAccRRR_pat<C4_and_and, And, Su<And>>;
	defm: BoolvAccRRR_pat<C4_and_or, And, Su<Or>>;
	defm: BoolvAccRRR_pat<C4_or_and, Or, Su<And>>;
	defm: BoolvAccRRR_pat<C4_or_or, Or, Su<Or>>;

	// op(Ps, op(Pt, !Pu))
	defm: BoolvAccRRR_pat<C4_and_andn, And, Su<VNot2<And, pnot>>>;
	defm: BoolvAccRRR_pat<C4_and_orn, And, Su<VNot2< Or, pnot>>>;
	defm: BoolvAccRRR_pat<C4_or_andn, Or, Su<VNot2<And, pnot>>>;
	defm: BoolvAccRRR_pat<C4_or_orn, Or, Su<VNot2< Or, pnot>>>;


	// --(5) Compare ---------------------------------------------------------
	//

	// Avoid negated comparisons, i.e. those of form "Pd = !cmp(...)".
	// These cannot form compounds (e.g. J4_cmpeqi_tp0_jump_nt).

	def: OpR_RI_pat<C2_cmpeqi, seteq, i1, I32, anyimm>;
	def: OpR_RI_pat<C2_cmpgti, setgt, i1, I32, anyimm>;
	def: OpR_RI_pat<C2_cmpgtui, setugt, i1, I32, anyimm>;

	def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)),
	(C2_cmpgti I32:$Rs, (SDEC1 imm:$s10))>;
	def: Pat<(i1 (setuge I32:$Rs, u32_0ImmPred:$u9)),
	(C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9))>;

	def: Pat<(i1 (setlt I32:$Rs, s32_0ImmPred:$s10)),
	(C2_not (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10)))>;
	def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)),
	(C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>;

	// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
	// that reverse the order of the operands.
	class RevCmp<PatFrag F>
	: PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode,
	F.OperandTransform>;

	def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>;
	def: OpR_RR_pat<C2_cmpgt, setgt, i1, I32>;
	def: OpR_RR_pat<C2_cmpgtu, setugt, i1, I32>;
	def: OpR_RR_pat<C2_cmpgt, RevCmp<setlt>, i1, I32>;
	def: OpR_RR_pat<C2_cmpgtu, RevCmp<setult>, i1, I32>;
	def: OpR_RR_pat<C2_cmpeqp, seteq, i1, I64>;
	def: OpR_RR_pat<C2_cmpgtp, setgt, i1, I64>;
	def: OpR_RR_pat<C2_cmpgtup, setugt, i1, I64>;
	def: OpR_RR_pat<C2_cmpgtp, RevCmp<setlt>, i1, I64>;
	def: OpR_RR_pat<C2_cmpgtup, RevCmp<setult>, i1, I64>;
	def: OpR_RR_pat<A2_vcmpbeq, seteq, i1, V8I8>;
	def: OpR_RR_pat<A2_vcmpbeq, seteq, v8i1, V8I8>;
	def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, i1, V8I8>;
	def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, v8i1, V8I8>;
	def: OpR_RR_pat<A4_vcmpbgt, setgt, i1, V8I8>;
	def: OpR_RR_pat<A4_vcmpbgt, setgt, v8i1, V8I8>;
	def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, i1, V8I8>;
	def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, v8i1, V8I8>;
	def: OpR_RR_pat<A2_vcmpbgtu, setugt, i1, V8I8>;
	def: OpR_RR_pat<A2_vcmpbgtu, setugt, v8i1, V8I8>;
	def: OpR_RR_pat<A2_vcmpheq, seteq, i1, V4I16>;
	def: OpR_RR_pat<A2_vcmpheq, seteq, v4i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, v4i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgt, setgt, i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgt, setgt, v4i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, v4i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgtu, setugt, i1, V4I16>;
	def: OpR_RR_pat<A2_vcmphgtu, setugt, v4i1, V4I16>;
	def: OpR_RR_pat<A2_vcmpweq, seteq, i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpweq, seteq, v2i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, v2i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgt, setgt, i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgt, setgt, v2i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
	def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;

	def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
	def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;

	def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
	def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;

	// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.

	def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)),
	(C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>;
	def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)),
	(C2_not (C2_cmpgti I32:$Rs, imm:$u5))>;
	def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)),
	(C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>;

	class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
	PatFrag RsPred, PatFrag RtPred = RsPred>
	: Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
	(Output RsPred:$Rs, RtPred:$Rt)>;

	class Outn<InstHexagon MI>
	: OutPatFrag<(ops node:$Rs, node:$Rt),
	(C2_not (MI $Rs, $Rt))>;

	def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>;
	def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>;
	def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>;
	def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>;
	def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>;
	def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>;
	def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>;
	def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>;
	def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>;
	def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>;
	def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>;
	def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>;
	def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>;
	def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>;
	def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>;
	def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>;
	def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>;
	def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>;
	def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>;
	def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>;
	def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>;
	def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>;
	def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>;
	def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>;
	def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>;

	let AddedComplexity = 100 in {
	def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)),
	(A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
	def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)),
	(C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
	def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 65535), 0)),
	(A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
	def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 65535), 0)),
	(C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
	}

	// PatFrag for AsserZext which takes the original type as a parameter.
	def SDTAssertZext: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0,1>]>;
	def AssertZextSD: SDNode<"ISD::AssertZext", SDTAssertZext>;
	class AssertZext<ValueType T>: PatFrag<(ops node:$A), (AssertZextSD $A, T)>;

	multiclass Cmpb_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt,
	PatLeaf ImmPred, int Mask> {
	def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)),
	(MI I32:$Rs, imm:$I)>;
	def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)),
	(MI I32:$Rs, imm:$I)>;
	}

	multiclass CmpbN_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt,
	PatLeaf ImmPred, int Mask> {
	def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)),
	(C2_not (MI I32:$Rs, imm:$I))>;
	def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)),
	(C2_not (MI I32:$Rs, imm:$I))>;
	}

	multiclass CmpbND_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt,
	PatLeaf ImmPred, int Mask> {
	def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)),
	(C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>;
	def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)),
	(C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>;
	}

	let AddedComplexity = 200 in {
	defm: Cmpb_pat <A4_cmpbeqi, seteq, AssertZext<i8>, IsUGT<8,31>, 255>;
	defm: CmpbN_pat <A4_cmpbeqi, setne, AssertZext<i8>, IsUGT<8,31>, 255>;
	defm: Cmpb_pat <A4_cmpbgtui, setugt, AssertZext<i8>, IsUGT<32,31>, 255>;
	defm: CmpbN_pat <A4_cmpbgtui, setule, AssertZext<i8>, IsUGT<32,31>, 255>;
	defm: Cmpb_pat <A4_cmphgtui, setugt, AssertZext<i16>, IsUGT<32,31>, 65535>;
	defm: CmpbN_pat <A4_cmphgtui, setule, AssertZext<i16>, IsUGT<32,31>, 65535>;
	defm: CmpbND_pat<A4_cmpbgtui, setult, AssertZext<i8>, IsUGT<32,32>, 255>;
	defm: CmpbND_pat<A4_cmphgtui, setult, AssertZext<i16>, IsUGT<32,32>, 65535>;
	}

	def: Pat<(i32 (zext (i1 (seteq I32:$Rs, I32:$Rt)))),
	(A4_rcmpeq I32:$Rs, I32:$Rt)>;
	def: Pat<(i32 (zext (i1 (setne I32:$Rs, I32:$Rt)))),
	(A4_rcmpneq I32:$Rs, I32:$Rt)>;
	def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))),
	(A4_rcmpeqi I32:$Rs, imm:$s8)>;
	def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))),
	(A4_rcmpneqi I32:$Rs, imm:$s8)>;

	def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>;
	def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>;
	def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>;
	def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;

	// Floating-point comparisons with checks for ordered/unordered status.

	class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3>
	: OutPatFrag<(ops node:$Rs, node:$Rt),
	(MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>;

	class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>;
	class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;

	class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
	class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;

	def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
	def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
	def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
	def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
	def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
	def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;

	def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
	def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
	def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
	def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
	def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
	def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;

	def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
	def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;

	def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
	def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;

	def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
	def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;


	// --(6) Select ----------------------------------------------------------
	//

	def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt),
	(C2_mux I1:$Pu, I32:$Rs, I32:$Rt)>;
	def: Pat<(select I1:$Pu, v4i8:$Rs, v4i8:$Rt),
	(C2_mux I1:$Pu, v4i8:$Rs, v4i8:$Rt)>;
	def: Pat<(select I1:$Pu, v2i16:$Rs, v2i16:$Rt),
	(C2_mux I1:$Pu, v2i16:$Rs, v2i16:$Rt)>;
	def: Pat<(select I1:$Pu, anyimm:$s8, I32:$Rs),
	(C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
	def: Pat<(select I1:$Pu, I32:$Rs, anyimm:$s8),
	(C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
	def: Pat<(select I1:$Pu, anyimm:$s8, s8_0ImmPred:$S8),
	(C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;

	def: Pat<(select (not I1:$Pu), I32:$Rs, I32:$Rt),
	(C2_mux I1:$Pu, I32:$Rt, I32:$Rs)>;
	def: Pat<(select (not I1:$Pu), s8_0ImmPred:$S8, anyimm:$s8),
	(C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
	def: Pat<(select (not I1:$Pu), anyimm:$s8, I32:$Rs),
	(C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
	def: Pat<(select (not I1:$Pu), I32:$Rs, anyimm:$s8),
	(C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;

	// Map from a 64-bit select to an emulated 64-bit mux.
	// Hexagon does not support 64-bit MUXes; so emulate with combines.
	def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
	(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
	(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;

	def: Pat<(select I1:$Pu, v2i32:$Rs, v2i32:$Rt),
	(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
	(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;

	def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
	(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
	def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
	(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
	def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
	(C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
	def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
	(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
	(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;

	def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
	(C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
	def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
	(C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;

	def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
	(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
	def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
	(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;

	def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt),
	(C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
	def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
	(C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
	def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt),
	(C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;

	def: Pat<(vselect (pnot V8I1:$Pu), V8I8:$Rs, V8I8:$Rt),
	(C2_vmux V8I1:$Pu, V8I8:$Rt, V8I8:$Rs)>;
	def: Pat<(vselect (pnot V4I1:$Pu), V4I16:$Rs, V4I16:$Rt),
	(C2_vmux V4I1:$Pu, V4I16:$Rt, V4I16:$Rs)>;
	def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt),
	(C2_vmux V2I1:$Pu, V2I32:$Rt, V2I32:$Rs)>;


	// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) \| (!Pu & Pw).
	def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
	(C2_or (C2_and I1:$Pu, I1:$Pv),
	(C2_andn I1:$Pw, I1:$Pu))>;


	def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{
	return isPositiveHalfWord(N);
	}]>;

	multiclass SelMinMax16_pats<PatFrag CmpOp, InstHexagon InstA,
	InstHexagon InstB> {
	def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)),
	IsPosHalf:$Rs, IsPosHalf:$Rt), i16),
	(InstA IntRegs:$Rs, IntRegs:$Rt)>;
	def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)),
	IsPosHalf:$Rt, IsPosHalf:$Rs), i16),
	(InstB IntRegs:$Rs, IntRegs:$Rt)>;
	}

	let AddedComplexity = 200 in {
	defm: SelMinMax16_pats<setge, A2_max, A2_min>;
	defm: SelMinMax16_pats<setgt, A2_max, A2_min>;
	defm: SelMinMax16_pats<setle, A2_min, A2_max>;
	defm: SelMinMax16_pats<setlt, A2_min, A2_max>;
	defm: SelMinMax16_pats<setuge, A2_maxu, A2_minu>;
	defm: SelMinMax16_pats<setugt, A2_maxu, A2_minu>;
	defm: SelMinMax16_pats<setule, A2_minu, A2_maxu>;
	defm: SelMinMax16_pats<setult, A2_minu, A2_maxu>;
	}

	def: OpR_RR_pat<A2_min, Smin, i32, I32, I32>;
	def: OpR_RR_pat<A2_max, Smax, i32, I32, I32>;
	def: OpR_RR_pat<A2_minu, Umin, i32, I32, I32>;
	def: OpR_RR_pat<A2_maxu, Umax, i32, I32, I32>;
	def: OpR_RR_pat<A2_minp, Smin, i64, I64, I64>;
	def: OpR_RR_pat<A2_maxp, Smax, i64, I64, I64>;
	def: OpR_RR_pat<A2_minup, Umin, i64, I64, I64>;
	def: OpR_RR_pat<A2_maxup, Umax, i64, I64, I64>;

	let AddedComplexity = 100 in {
	defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setogt, i1, F32>;
	defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setoge, i1, F32>;
	defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setolt, i1, F32>;
	defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setole, i1, F32>;
	}

	let AddedComplexity = 100, Predicates = [HasV67] in {
	defm: MinMax_pats<F2_dfmin, F2_dfmax, select, setogt, i1, F64>;
	defm: MinMax_pats<F2_dfmin, F2_dfmax, select, setoge, i1, F64>;
	defm: MinMax_pats<F2_dfmax, F2_dfmin, select, setolt, i1, F64>;
	defm: MinMax_pats<F2_dfmax, F2_dfmin, select, setole, i1, F64>;
	}

	def: OpR_RR_pat<A2_vminb, Smin, v8i8, V8I8>;
	def: OpR_RR_pat<A2_vmaxb, Smax, v8i8, V8I8>;
	def: OpR_RR_pat<A2_vminub, Umin, v8i8, V8I8>;
	def: OpR_RR_pat<A2_vmaxub, Umax, v8i8, V8I8>;

	def: OpR_RR_pat<A2_vminh, Smin, v4i16, V4I16>;
	def: OpR_RR_pat<A2_vmaxh, Smax, v4i16, V4I16>;
	def: OpR_RR_pat<A2_vminuh, Umin, v4i16, V4I16>;
	def: OpR_RR_pat<A2_vmaxuh, Umax, v4i16, V4I16>;

	def: OpR_RR_pat<A2_vminw, Smin, v2i32, V2I32>;
	def: OpR_RR_pat<A2_vmaxw, Smax, v2i32, V2I32>;
	def: OpR_RR_pat<A2_vminuw, Umin, v2i32, V2I32>;
	def: OpR_RR_pat<A2_vmaxuw, Umax, v2i32, V2I32>;

	// --(7) Insert/extract --------------------------------------------------
	//

	def SDTHexagonINSERT:
	SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
	SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
	def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;

	let AddedComplexity = 10 in {
	def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
	(S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>;
	def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
	(S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>;
	}
	def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off),
	(S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>;
	def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off),
	(S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>;

	def SDTHexagonEXTRACTU
	: SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
	SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
	def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;

	let AddedComplexity = 10 in {
	def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5),
	(S2_extractu I32:$Rs, imm:$u5, imm:$U5)>;
	def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6),
	(S2_extractup I64:$Rs, imm:$u6, imm:$U6)>;
	}
	def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off),
	(S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>;
	def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off),
	(S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>;

	def: Pat<(v4i8 (splat_vector anyint:$V)), (ToI32 (SplatB $V))>;
	def: Pat<(v2i16 (splat_vector anyint:$V)), (ToI32 (SplatH $V))>;
	def: Pat<(v8i8 (splat_vector anyint:$V)),
	(Combinew (ToI32 (SplatB $V)), (ToI32 (SplatB $V)))>;
	def: Pat<(v4i16 (splat_vector anyint:$V)),
	(Combinew (ToI32 (SplatH $V)), (ToI32 (SplatH $V)))>;
	let AddedComplexity = 10 in
	def: Pat<(v2i32 (splat_vector s8_0ImmPred:$s8)),
	(A2_combineii imm:$s8, imm:$s8)>;
	def: Pat<(v2i32 (splat_vector anyimm:$V)), (Combinew (ToI32 $V), (ToI32 $V))>;

	def: Pat<(v4i8 (splat_vector I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
	def: Pat<(v2i16 (splat_vector I32:$Rs)), (LoReg (S2_vsplatrh I32:$Rs))>;
	def: Pat<(v4i16 (splat_vector I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
	def: Pat<(v2i32 (splat_vector I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;

	let AddedComplexity = 10 in
	def: Pat<(v8i8 (splat_vector I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
	Requires<[HasV62]>;
	def: Pat<(v8i8 (splat_vector I32:$Rs)),
	(Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;

	let AddedComplexity = 10 in {
	def: Pat<(sext_inreg (HexagonEXTRACTU I32:$Rs, 8, u5_0ImmPred:$U5), i8),
	(S4_extract I32:$Rs, 8, imm:$U5)>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I32:$Rs, 16, u5_0ImmPred:$U5), i16),
	(S4_extract I32:$Rs, 16, imm:$U5)>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I64:$Rs, 8, u6_0ImmPred:$U6), i8),
	(S4_extractp I64:$Rs, 8, imm:$U6)>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I64:$Rs, 16, u6_0ImmPred:$U6), i16),
	(S4_extractp I64:$Rs, 16, imm:$U6)>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I64:$Rs, 32, u6_0ImmPred:$U6), i32),
	(S4_extractp I64:$Rs, 32, imm:$U6)>;
	}

	def: Pat<(sext_inreg (HexagonEXTRACTU I32:$Rs, 8, I32:$Off), i8),
	(S4_extract_rp I32:$Rs, (Combinew (ToI32 8), I32:$Off))>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I32:$Rs, 16, I32:$Off), i16),
	(S4_extract_rp I32:$Rs, (Combinew (ToI32 16), I32:$Off))>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I64:$Rs, 8, I32:$Off), i8),
	(S4_extractp_rp I64:$Rs, (Combinew (ToI32 8), I32:$Off))>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I64:$Rs, 16, I32:$Off), i16),
	(S4_extractp_rp I64:$Rs, (Combinew (ToI32 16), I32:$Off))>;
	def: Pat<(sext_inreg (HexagonEXTRACTU I64:$Rs, 32, I32:$Off), i32),
	(S4_extractp_rp I64:$Rs, (Combinew (ToI32 32), I32:$Off))>;


	// --(8) Shift/permute ---------------------------------------------------
	//

	def SDTHexagonI64I32I32: SDTypeProfile<1, 2,
	[SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;

	def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;

	def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>;

	// The complexity of the combines involving immediates should be greater
	// than the complexity of the combine with two registers.
	let AddedComplexity = 50 in {
	def: Pat<(HexagonCOMBINE I32:$Rs, anyimm:$s8),
	(A4_combineri IntRegs:$Rs, imm:$s8)>;
	def: Pat<(HexagonCOMBINE anyimm:$s8, I32:$Rs),
	(A4_combineir imm:$s8, IntRegs:$Rs)>;
	}

	// The complexity of the combine with two immediates should be greater than
	// the complexity of a combine involving a register.
	let AddedComplexity = 75 in {
	def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, anyimm:$u6),
	(A4_combineii imm:$s8, imm:$u6)>;
	def: Pat<(HexagonCOMBINE anyimm:$s8, s8_0ImmPred:$S8),
	(A2_combineii imm:$s8, imm:$S8)>;
	}

	def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>;
	def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)),
	(A2_swiz (HiReg $Rss)))>;

	+def: Pat<(bswap V2I16:$Rs), (A2_combine_lh (A2_swiz $Rs), (A2_swiz $Rs))>;
	+def: Pat<(bswap V2I32:$Rs), (Combinew (A2_swiz (HiReg $Rs)),
	+ (A2_swiz (LoReg $Rs)))>;
	+def: Pat<(bswap V4I16:$Rs), (A2_orp (S2_lsr_i_vh $Rs, 8),
	+ (S2_asl_i_vh $Rs, 8))>;
	+
	def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), (S4_lsli imm:$s6, I32:$Rt)>;
	def: Pat<(shl I32:$Rs, (i32 16)), (A2_aslh I32:$Rs)>;
	def: Pat<(sra I32:$Rs, (i32 16)), (A2_asrh I32:$Rs)>;

	def: OpR_RI_pat<S2_asr_i_r, Sra, i32, I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_lsr_i_r, Srl, i32, I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_asl_i_r, Shl, i32, I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_asr_i_p, Sra, i64, I64, u6_0ImmPred>;
	def: OpR_RI_pat<S2_lsr_i_p, Srl, i64, I64, u6_0ImmPred>;
	def: OpR_RI_pat<S2_asl_i_p, Shl, i64, I64, u6_0ImmPred>;
	def: OpR_RI_pat<S2_asr_i_vh, Sra, v4i16, V4I16, u4_0ImmPred>;
	def: OpR_RI_pat<S2_lsr_i_vh, Srl, v4i16, V4I16, u4_0ImmPred>;
	def: OpR_RI_pat<S2_asl_i_vh, Shl, v4i16, V4I16, u4_0ImmPred>;
	def: OpR_RI_pat<S2_asr_i_vh, Sra, v2i32, V2I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_lsr_i_vh, Srl, v2i32, V2I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_asl_i_vh, Shl, v2i32, V2I32, u5_0ImmPred>;

	def: OpR_RR_pat<S2_asr_r_r, Sra, i32, I32, I32>;
	def: OpR_RR_pat<S2_lsr_r_r, Srl, i32, I32, I32>;
	def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>;
	def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
	def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
	def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;

	// Funnel shifts.
	def IsMul8_U3: PatLeaf<(i32 imm), [{
	uint64_t V = N->getZExtValue();
	return V % 8 == 0 && isUInt<3>(V / 8);
	}]>;

	def Divu8: SDNodeXForm<imm, [{
	return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32);
	}]>;

	// Funnel shift-left.
	def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
	(HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>;
	def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
	(HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>;

	def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
	(S2_lsr_i_p_or (S2_asl_i_p $Rs, $S), $Rt, (Subi<64> $S))>;
	def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
	(S2_lsr_r_p_or (S2_asl_r_p $Rs, $Ru), $Rt, (A2_subri 64, $Ru))>;

	// Combined SDNodeXForm: (Divu8 (Subi<64> $S))
	def Divu64_8: SDNodeXForm<imm, [{
	return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8,
	SDLoc(N), MVT::i32);
	}]>;

	// Special cases:
	let AddedComplexity = 100 in {
	def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)),
	(A2_combine_lh I32:$Rs, I32:$Rt)>;
	def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S),
	(S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>;
	}

	let Predicates = [HasV60], AddedComplexity = 50 in {
	def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
	def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
	}
	let AddedComplexity = 30 in {
	def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>;
	def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>;
	def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>;
	def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>;
	}
	def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>;
	def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>;
	def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>;
	def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>;

	// Funnel shift-right.
	def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
	(LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>;
	def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
	(LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>;

	def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
	(S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
	def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
	(S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;

	// Special cases:
	let AddedComplexity = 100 in {
	def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)),
	(A2_combine_lh I32:$Rs, I32:$Rt)>;
	def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S),
	(S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>;
	}

	let Predicates = [HasV60], AddedComplexity = 50 in {
	def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>;
	def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>;
	}
	let AddedComplexity = 30 in {
	def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>;
	def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>;
	def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>;
	def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>;
	}
	def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>;
	def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>;
	def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>;
	def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>;


	def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
	(S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
	def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
	(S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;

	// Prefer S2_addasl_rrri over S2_asl_i_r_acc.
	let AddedComplexity = 120 in
	def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)),
	(S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;

	let AddedComplexity = 100 in {
	def: AccRRI_pat<S2_asr_i_r_acc, Add, Su<Sra>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asr_i_r_nac, Sub, Su<Sra>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asr_i_r_and, And, Su<Sra>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asr_i_r_or, Or, Su<Sra>, I32, u5_0ImmPred>;

	def: AccRRI_pat<S2_asr_i_p_acc, Add, Su<Sra>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asr_i_p_nac, Sub, Su<Sra>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asr_i_p_and, And, Su<Sra>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asr_i_p_or, Or, Su<Sra>, I64, u6_0ImmPred>;

	def: AccRRI_pat<S2_lsr_i_r_acc, Add, Su<Srl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_r_nac, Sub, Su<Srl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_r_and, And, Su<Srl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_r_or, Or, Su<Srl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_r_xacc, Xor, Su<Srl>, I32, u5_0ImmPred>;

	def: AccRRI_pat<S2_lsr_i_p_acc, Add, Su<Srl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_p_nac, Sub, Su<Srl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_p_and, And, Su<Srl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_p_or, Or, Su<Srl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_lsr_i_p_xacc, Xor, Su<Srl>, I64, u6_0ImmPred>;

	def: AccRRI_pat<S2_asl_i_r_acc, Add, Su<Shl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_r_nac, Sub, Su<Shl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_r_and, And, Su<Shl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_r_or, Or, Su<Shl>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_r_xacc, Xor, Su<Shl>, I32, u5_0ImmPred>;

	def: AccRRI_pat<S2_asl_i_p_acc, Add, Su<Shl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_p_nac, Sub, Su<Shl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>;

	let Predicates = [HasV60] in {
	def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>;

	def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>;
	def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>;
	}
	}

	let AddedComplexity = 100 in {
	def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>;
	def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>;
	def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>;
	def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>;

	def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>;
	def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>;
	def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>;
	def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>;
	def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>;

	def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>;
	def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>;
	def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>;
	def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>;

	def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>;
	def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>;
	def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>;
	def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>;
	def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>;

	def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>;
	def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>;
	def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>;
	def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>;

	def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>;
	def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>;
	def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>;
	def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>;
	def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>;
	}


	class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp,
	PatFrag RegPred, PatFrag ImmPred>
	: Pat<(Op anyimm:$u8, (ShOp RegPred:$Rs, ImmPred:$U5)),
	(MI anyimm:$u8, RegPred:$Rs, imm:$U5)>;

	let AddedComplexity = 200, Predicates = [UseCompound] in {
	def: OpshIRI_pat<S4_addi_asl_ri, Add, Su<Shl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_addi_lsr_ri, Add, Su<Srl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_subi_asl_ri, Sub, Su<Shl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_subi_lsr_ri, Sub, Su<Srl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_andi_asl_ri, And, Su<Shl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_andi_lsr_ri, And, Su<Srl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_ori_asl_ri, Or, Su<Shl>, I32, u5_0ImmPred>;
	def: OpshIRI_pat<S4_ori_lsr_ri, Or, Su<Srl>, I32, u5_0ImmPred>;
	}

	// Prefer this pattern to S2_asl_i_p_or for the special case of joining
	// two 32-bit words into a 64-bit word.
	let AddedComplexity = 200 in
	def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)),
	(Combinew I32:$a, I32:$b)>;

	def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)),
	(Zext64 (and I32:$a, (i32 65535)))),
	(shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))),
	(shl (Aext64 I32:$d), (i32 48))),
	(Combinew (A2_combine_ll I32:$d, I32:$c),
	(A2_combine_ll I32:$b, I32:$a))>;

	let AddedComplexity = 200 in {
	def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))),
	(A2_combine_ll I32:$Rt, I32:$Rs)>;
	def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))),
	(A2_combine_lh I32:$Rt, I32:$Rs)>;
	def: Pat<(or (and I32:$Rt, (i32 268431360)), (and I32:$Rs, (i32 65535))),
	(A2_combine_hl I32:$Rt, I32:$Rs)>;
	def: Pat<(or (and I32:$Rt, (i32 268431360)), (srl I32:$Rs, (i32 16))),
	(A2_combine_hh I32:$Rt, I32:$Rs)>;
	}

	def SDTHexagonVShift
	: SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>;

	def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>;
	def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>;
	def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>;

	// Funnel shifts with the shift amount module element bit width.
	def HexagonMFSHL: SDNode<"HexagonISD::MFSHL", SDTIntShiftDOp>;
	def HexagonMFSHR: SDNode<"HexagonISD::MFSHR", SDTIntShiftDOp>;

	def: OpR_RI_pat<S2_asl_i_vw, pf2<HexagonVASL>, v2i32, V2I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_asl_i_vh, pf2<HexagonVASL>, v4i16, V4I16, u4_0ImmPred>;
	def: OpR_RI_pat<S2_asr_i_vw, pf2<HexagonVASR>, v2i32, V2I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_asr_i_vh, pf2<HexagonVASR>, v4i16, V4I16, u4_0ImmPred>;
	def: OpR_RI_pat<S2_lsr_i_vw, pf2<HexagonVLSR>, v2i32, V2I32, u5_0ImmPred>;
	def: OpR_RI_pat<S2_lsr_i_vh, pf2<HexagonVLSR>, v4i16, V4I16, u4_0ImmPred>;

	def: OpR_RR_pat<S2_asl_r_vw, pf2<HexagonVASL>, v2i32, V2I32, I32>;
	def: OpR_RR_pat<S2_asl_r_vh, pf2<HexagonVASL>, v4i16, V4I16, I32>;
	def: OpR_RR_pat<S2_asr_r_vw, pf2<HexagonVASR>, v2i32, V2I32, I32>;
	def: OpR_RR_pat<S2_asr_r_vh, pf2<HexagonVASR>, v4i16, V4I16, I32>;
	def: OpR_RR_pat<S2_lsr_r_vw, pf2<HexagonVLSR>, v2i32, V2I32, I32>;
	def: OpR_RR_pat<S2_lsr_r_vh, pf2<HexagonVLSR>, v4i16, V4I16, I32>;

	def: Pat<(sra V2I32:$b, (v2i32 (splat_vector u5_0ImmPred:$c))),
	(S2_asr_i_vw V2I32:$b, imm:$c)>;
	def: Pat<(srl V2I32:$b, (v2i32 (splat_vector u5_0ImmPred:$c))),
	(S2_lsr_i_vw V2I32:$b, imm:$c)>;
	def: Pat<(shl V2I32:$b, (v2i32 (splat_vector u5_0ImmPred:$c))),
	(S2_asl_i_vw V2I32:$b, imm:$c)>;
	def: Pat<(sra V4I16:$b, (v4i16 (splat_vector u4_0ImmPred:$c))),
	(S2_asr_i_vh V4I16:$b, imm:$c)>;
	def: Pat<(srl V4I16:$b, (v4i16 (splat_vector u4_0ImmPred:$c))),
	(S2_lsr_i_vh V4I16:$b, imm:$c)>;
	def: Pat<(shl V4I16:$b, (v4i16 (splat_vector u4_0ImmPred:$c))),
	(S2_asl_i_vh V4I16:$b, imm:$c)>;

	def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S),
	(LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>;
	def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S),
	(LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>;
	def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S),
	(LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>;
	def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt),
	(LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>;
	def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt),
	(LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>;
	def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt),
	(LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>;


	// --(9) Arithmetic/bitwise ----------------------------------------------
	//

	def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>;
	def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>;
	def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
	def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
	def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;

	def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
	def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;

	def: Pat<(fabs F64:$Rs),
	(Combinew (S2_clrbit_i (HiReg $Rs), 31),
	(i32 (LoReg $Rs)))>;
	def: Pat<(fneg F64:$Rs),
	(Combinew (S2_togglebit_i (HiReg $Rs), 31),
	(i32 (LoReg $Rs)))>;

	def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
	def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
	def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>;
	def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>;

	def: OpR_RR_pat<A2_add, Add, i32, I32>;
	def: OpR_RR_pat<A2_sub, Sub, i32, I32>;
	def: OpR_RR_pat<A2_and, And, i32, I32>;
	def: OpR_RR_pat<A2_or, Or, i32, I32>;
	def: OpR_RR_pat<A2_xor, Xor, i32, I32>;
	def: OpR_RR_pat<A2_addp, Add, i64, I64>;
	def: OpR_RR_pat<A2_subp, Sub, i64, I64>;
	def: OpR_RR_pat<A2_andp, And, i64, I64>;
	def: OpR_RR_pat<A2_orp, Or, i64, I64>;
	def: OpR_RR_pat<A2_xorp, Xor, i64, I64>;
	def: OpR_RR_pat<A4_andnp, Not2<And>, i64, I64>;
	def: OpR_RR_pat<A4_ornp, Not2<Or>, i64, I64>;

	def: OpR_RR_pat<A2_svaddh, Add, v2i16, V2I16>;
	def: OpR_RR_pat<A2_svsubh, Sub, v2i16, V2I16>;

	def: OpR_RR_pat<A2_vaddub, Add, v8i8, V8I8>;
	def: OpR_RR_pat<A2_vaddh, Add, v4i16, V4I16>;
	def: OpR_RR_pat<A2_vaddw, Add, v2i32, V2I32>;
	def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>;
	def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>;
	def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>;

	def: OpR_RR_pat<A2_and, And, v4i8, V4I8>;
	def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>;
	def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>;
	def: OpR_RR_pat<A2_and, And, v2i16, V2I16>;
	def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>;
	def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>;
	def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>;
	def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>;
	def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>;
	def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>;
	def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
	def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>;
	def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>;
	def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>;
	def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>;

	def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>;
	def: OpR_RR_pat<M2_mpy_up, pf2<mulhs>, i32, I32>;
	def: OpR_RR_pat<M2_mpyu_up, pf2<mulhu>, i32, I32>;
	def: OpR_RI_pat<M2_mpysip, Mul, i32, I32, u32_0ImmPred>;
	def: OpR_RI_pat<M2_mpysmi, Mul, i32, I32, s32_0ImmPred>;

	// Arithmetic on predicates.
	def: OpR_RR_pat<C2_xor, Add, i1, I1>;
	def: OpR_RR_pat<C2_xor, Add, v2i1, V2I1>;
	def: OpR_RR_pat<C2_xor, Add, v4i1, V4I1>;
	def: OpR_RR_pat<C2_xor, Add, v8i1, V8I1>;
	def: OpR_RR_pat<C2_xor, Sub, i1, I1>;
	def: OpR_RR_pat<C2_xor, Sub, v2i1, V2I1>;
	def: OpR_RR_pat<C2_xor, Sub, v4i1, V4I1>;
	def: OpR_RR_pat<C2_xor, Sub, v8i1, V8I1>;
	def: OpR_RR_pat<C2_and, Mul, i1, I1>;
	def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
	def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
	def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;

	def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
	def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
	def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
	def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
	def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;

	let Predicates = [HasV66] in {
	def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>;
	def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>;
	}

	def DfMpy: OutPatFrag<(ops node:$Rs, node:$Rt),
	(F2_dfmpyhh
	(F2_dfmpylh
	(F2_dfmpylh
	(F2_dfmpyll $Rs, $Rt),
	$Rs, $Rt),
	$Rt, $Rs),
	$Rs, $Rt)>;

	let Predicates = [HasV67,UseUnsafeMath], AddedComplexity = 50 in {
	def: Pat<(fmul F64:$Rs, F64:$Rt), (DfMpy $Rs, $Rt)>;
	}
	let Predicates = [HasV67] in {
	def: OpR_RR_pat<F2_dfmin, pf2<fminnum>, f64, F64>;
	def: OpR_RR_pat<F2_dfmax, pf2<fmaxnum>, f64, F64>;

	def: Pat<(fmul F64:$Rs, F64:$Rt), (DfMpy (F2_dfmpyfix $Rs, $Rt),
	(F2_dfmpyfix $Rt, $Rs))>;
	}

	// In expressions like a0b0 + a1b1 + ..., prefer to generate multiply-add,
	// over add-add with individual multiplies as inputs.
	let AddedComplexity = 10 in {
	def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>;
	def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>;
	def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>;
	let Predicates = [HasV66] in
	def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>;
	}

	def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>;
	def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>;
	def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>;

	// Mulh for vectors
	//
	def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)),
	(Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)),
	(M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>;

	def: Pat<(v2i32 (mulhs V2I32:$Rss, V2I32:$Rtt)),
	(Combinew (M2_mpy_up (HiReg $Rss), (HiReg $Rtt)),
	(M2_mpy_up (LoReg $Rss), (LoReg $Rtt)))>;

	def Mulhub4:
	OutPatFrag<(ops node:$Rs, node:$Rt), (S2_vtrunohb (M5_vmpybuu $Rs, $Rt))>;
	def Mulhub8:
	OutPatFrag<(ops node:$Rss, node:$Rtt),
	(Combinew (Mulhub4 (HiReg $Rss), (HiReg $Rtt)),
	(Mulhub4 (LoReg $Rss), (LoReg $Rtt)))>;

	// (mux (x >= 0), 0, y)
	def Negbytes8:
	OutPatFrag<(ops node:$Rss, node:$Rtt),
	(C2_vmux (A4_vcmpbgti $Rss, -1), (A2_tfrpi 0), $Rtt)>;

	def: Pat<(v4i8 (mulhu V4I8:$Rs, V4I8:$Rt)), (Mulhub4 $Rs, $Rt)>;
	def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)), (Mulhub8 $Rss, $Rtt)>;

	// (Mulhs x, y) = (Mulhu x, y) - (x < 0 ? y : 0) - (y < 0 ? x : 0)
	def Mulhsb8:
	OutPatFrag<(ops node:$Rss, node:$Rtt),
	(A2_vsubub (Mulhub8 $Rss, $Rtt),
	(A2_vaddub (Negbytes8 $Rss, $Rtt),
	(Negbytes8 $Rtt, $Rss)))>;

	def: Pat<(v4i8 (mulhs V4I8:$Rs, V4I8:$Rt)),
	(LoReg (Mulhsb8 (v8i8 (ToAext64 $Rs)), (v8i8 (ToAext64 $Rt))))>;
	def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)), (Mulhsb8 $Rss, $Rtt)>;

	// v2i16 *s v2i16 -> v2i32
	def Muli16:
	OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>;

	def Mulhsh2:
	OutPatFrag<(ops node:$Rs, node:$Rt),
	(A2_combine_hh (HiReg (Muli16 $Rs, $Rt)),
	(LoReg (Muli16 $Rs, $Rt)))>;
	def Mulhsh4:
	OutPatFrag<(ops node:$Rss, node:$Rtt),
	(Combinew (Mulhsh2 (HiReg $Rss), (HiReg $Rtt)),
	(Mulhsh2 (LoReg $Rss), (LoReg $Rtt)))>;

	def: Pat<(v2i16 (mulhs V2I16:$Rs, V2I16:$Rt)), (Mulhsh2 $Rs, $Rt)>;
	def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh4 $Rss, $Rtt)>;

	def: Pat<(v2i16 (mulhu V2I16:$Rs, V2I16:$Rt)),
	(A2_svaddh
	(Mulhsh2 $Rs, $Rt),
	(A2_svaddh (LoReg (A2_andp (Combinew $Rt, $Rs),
	(S2_asr_i_vh (Combinew $Rs, $Rt), 15))),
	(HiReg (A2_andp (Combinew $Rt, $Rs),
	(S2_asr_i_vh (Combinew $Rs, $Rt), 15)))))>;

	def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)),
	(A2_vaddh
	(Mulhsh4 $Rss, $Rtt),
	(A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)),
	(A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>;


	def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)),
	(M2_mpysin IntRegs:$Rs, imm:$u8)>;

	def n8_0ImmPred: PatLeaf<(i32 imm), [{
	int64_t V = N->getSExtValue();
	return -255 <= V && V <= 0;
	}]>;

	// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
	def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
	(M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>;

	def: Pat<(add Sext64:$Rs, I64:$Rt),
	(A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;

	def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>;
	def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>;
	def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
	def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>;
	def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>;
	def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
	def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>;
	def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>;
	def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
	def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;

	// For dags like (or (and (not _), _), (shl _, _)) where the "or" with
	// one argument matches the patterns below, and with the other argument
	// matches S2_asl_r_r_or, etc, prefer the patterns below.
	let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor.
	def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>;
	def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>;
	def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>;
	}

	// S4_addaddi and S4_subaddi don't have tied operands, so give them
	// a bit of preference.
	let AddedComplexity = 30, Predicates = [UseCompound] in {
	def: Pat<(add I32:$Rs, (Su<Add> I32:$Ru, anyimm:$s6)),
	(S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
	def: Pat<(add anyimm:$s6, (Su<Add> I32:$Rs, I32:$Ru)),
	(S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
	def: Pat<(add I32:$Rs, (Su<Sub> anyimm:$s6, I32:$Ru)),
	(S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
	def: Pat<(sub (Su<Add> I32:$Rs, anyimm:$s6), I32:$Ru),
	(S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
	def: Pat<(add (Su<Sub> I32:$Rs, I32:$Ru), anyimm:$s6),
	(S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
	}

	let Predicates = [UseCompound] in
	def: Pat<(or I32:$Ru, (Su<And> I32:$Rx, anyimm:$s10)),
	(S4_or_andix IntRegs:$Ru, IntRegs:$Rx, imm:$s10)>;

	def: Pat<(or I32:$Rx, (Su<And> I32:$Rs, anyimm:$s10)),
	(S4_or_andi IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>;
	def: Pat<(or I32:$Rx, (Su<Or> I32:$Rs, anyimm:$s10)),
	(S4_or_ori IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>;


	def: Pat<(i32 (trunc (sra (Su<Mul> Sext64:$Rs, Sext64:$Rt), (i32 32)))),
	(M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
	def: Pat<(i32 (trunc (srl (Su<Mul> Sext64:$Rs, Sext64:$Rt), (i32 32)))),
	(M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;

	def: Pat<(mul (Zext64 I32:$Rs), (Zext64 I32:$Rt)),
	(M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>;
	def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)),
	(M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>;
	def: Pat<(mul Sext64:$Rs, Sext64:$Rt),
	(M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;

	def: Pat<(add I64:$Rx, (Su<Mul> Sext64:$Rs, Sext64:$Rt)),
	(M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
	def: Pat<(sub I64:$Rx, (Su<Mul> Sext64:$Rs, Sext64:$Rt)),
	(M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
	def: Pat<(add I64:$Rx, (Su<Mul> (Aext64 I32:$Rs), (Aext64 I32:$Rt))),
	(M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
	def: Pat<(add I64:$Rx, (Su<Mul> (Zext64 I32:$Rs), (Zext64 I32:$Rt))),
	(M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
	def: Pat<(sub I64:$Rx, (Su<Mul> (Aext64 I32:$Rs), (Aext64 I32:$Rt))),
	(M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
	def: Pat<(sub I64:$Rx, (Su<Mul> (Zext64 I32:$Rs), (Zext64 I32:$Rt))),
	(M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;

	// Add halfword.
	def: Pat<(sext_inreg (add I32:$Rt, I32:$Rs), i16),
	(A2_addh_l16_ll I32:$Rt, I32:$Rs)>;
	def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)),
	(A2_addh_l16_hl I32:$Rt, I32:$Rs)>;
	def: Pat<(shl (add I32:$Rt, I32:$Rs), (i32 16)),
	(A2_addh_h16_ll I32:$Rt, I32:$Rs)>;

	// Subtract halfword.
	def: Pat<(sext_inreg (sub I32:$Rt, I32:$Rs), i16),
	(A2_subh_l16_ll I32:$Rt, I32:$Rs)>;
	def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)),
	(A2_addh_l16_hl I32:$Rt, I32:$Rs)>;
	def: Pat<(shl (sub I32:$Rt, I32:$Rs), (i32 16)),
	(A2_subh_h16_ll I32:$Rt, I32:$Rs)>;

	def: Pat<(mul I64:$Rss, I64:$Rtt),
	(Combinew
	(M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))),
	(LoReg $Rss),
	(HiReg $Rtt)),
	(LoReg $Rtt),
	(HiReg $Rss)),
	(i32 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)))))>;

	def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
	(A2_addp
	(M2_dpmpyuu_acc_s0
	(S2_lsr_i_p
	(A2_addp
	(M2_dpmpyuu_acc_s0
	(S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
	(HiReg $Rss),
	(LoReg $Rtt)),
	(A4_combineir 0, (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
	32),
	(HiReg $Rss),
	(HiReg $Rtt)),
	(S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;

	// Multiply 64-bit unsigned and use upper result.
	def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;

	// Multiply 64-bit signed and use upper result.
	//
	// For two signed 64-bit integers A and B, let A' and B' denote A and B
	// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
	// sign bit of A (and identically for B). With this notation, the signed
	// product A*B can be written as:
	// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
	// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
	// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
	// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']

	// Clear the sign bit in a 64-bit register.
	def ClearSign : OutPatFrag<(ops node:$Rss),
	(Combinew (S2_clrbit_i (HiReg $Rss), 31), (i32 (LoReg $Rss)))>;

	def : Pat <(mulhs I64:$Rss, I64:$Rtt),
	(A2_subp
	(MulHU $Rss, $Rtt),
	(A2_addp
	(A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
	(A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;

	// Prefer these instructions over M2_macsip/M2_macsin: the macsi* instructions
	// will put the immediate addend into a register, while these instructions will
	// use it directly. Such a construct does not appear in the middle of a gep,
	// where M2_macsip would be preferable.
	let AddedComplexity = 20, Predicates = [UseCompound] in {
	def: Pat<(add (Su<Mul> I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6),
	(M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
	def: Pat<(add (Su<Mul> I32:$Rs, I32:$Rt), anyimm:$u6),
	(M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
	}

	// Keep these instructions less preferable to M2_macsip/M2_macsin.
	let Predicates = [UseCompound] in {
	def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, u6_2ImmPred:$u6_2)),
	(M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>;
	def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, anyimm:$u6)),
	(M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>;
	def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
	(M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
	}

	def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
	(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
	def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
	(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;

	def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
	(PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
	def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
	(PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>;

	// Add/subtract two v4i8: Hexagon does not have an insn for this one, so
	// we use the double add v8i8, and use only the low part of the result.
	def: Pat<(add V4I8:$Rs, V4I8:$Rt),
	(LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>;
	def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
	(LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>;

	// Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two
	// half-words, and saturates the result to a 32-bit value, except the
	// saturation never happens (it can only occur with scaling).
	def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
	(LoReg (S2_vtrunewh (IMPLICIT_DEF),
	(M2_vmpy2s_s0 V2I16:$Rs, V2I16:$Rt)))>;
	def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
	(S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)),
	(M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>;

	// Multiplies two v4i8 vectors.
	def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
	(S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;

	// Multiplies two v8i8 vectors.
	def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
	(Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
	(S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;


	// --(10) Bit ------------------------------------------------------------
	//

	// Count leading zeros.
	def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
	def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;

	// Count trailing zeros.
	def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
	def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;

	// Count leading ones.
	def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
	def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;

	// Count trailing ones.
	def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
	def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;

	// Define leading/trailing patterns that require zero-extensions to 64 bits.
	def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>;
	def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>;
	def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>;
	def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>;

	def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>;
	def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;

	def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
	def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;

	+def: Pat<(bitreverse V4I8:$Rs), (A2_swiz (S2_brev $Rs))>;
	+def: Pat<(bitreverse V8I8:$Rs), (Combinew (A2_swiz (LoReg (S2_brevp $Rs))),
	+ (A2_swiz (HiReg (S2_brevp $Rs))))>;
	+def: Pat<(bitreverse V2I16:$Rs), (A2_combine_lh (S2_brev $Rs),
	+ (S2_brev $Rs))>;
	+def: Pat<(bitreverse V4I16:$Rs),
	+ (Combinew (A2_combine_lh (LoReg (S2_brevp $Rs)),
	+ (LoReg (S2_brevp $Rs))),
	+ (A2_combine_lh (HiReg (S2_brevp $Rs)),
	+ (HiReg (S2_brevp $Rs))))>;
	+def: Pat<(bitreverse V2I32:$Rs),
	+ (Combinew (i32 (LoReg (S2_brevp $Rs))),
	+ (i32 (HiReg (S2_brevp $Rs))))>;
	+
	let AddedComplexity = 20 in { // Complexity greater than and/or/xor
	def: Pat<(and I32:$Rs, IsNPow2_32:$V),
	(S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
	def: Pat<(or I32:$Rs, IsPow2_32:$V),
	(S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>;
	def: Pat<(xor I32:$Rs, IsPow2_32:$V),
	(S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>;

	def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))),
	(S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
	def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)),
	(S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
	def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)),
	(S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
	}

	// Clr/set/toggle bit for 64-bit values with immediate bit index.
	let AddedComplexity = 20 in { // Complexity greater than and/or/xor
	def: Pat<(and I64:$Rss, IsNPow2_64L:$V),
	(Combinew (i32 (HiReg $Rss)),
	(S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)))>;
	def: Pat<(and I64:$Rss, IsNPow2_64H:$V),
	(Combinew (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))),
	(i32 (LoReg $Rss)))>;

	def: Pat<(or I64:$Rss, IsPow2_64L:$V),
	(Combinew (i32 (HiReg $Rss)),
	(S2_setbit_i (LoReg $Rss), (Log2_64 $V)))>;
	def: Pat<(or I64:$Rss, IsPow2_64H:$V),
	(Combinew (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))),
	(i32 (LoReg $Rss)))>;

	def: Pat<(xor I64:$Rss, IsPow2_64L:$V),
	(Combinew (i32 (HiReg $Rss)),
	(S2_togglebit_i (LoReg $Rss), (Log2_64 $V)))>;
	def: Pat<(xor I64:$Rss, IsPow2_64H:$V),
	(Combinew (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))),
	(i32 (LoReg $Rss)))>;
	}


	let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
	def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
	(S2_tstbit_i IntRegs:$Rs, imm:$u5)>;
	def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
	(S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
	def: Pat<(i1 (trunc I32:$Rs)),
	(S2_tstbit_i IntRegs:$Rs, 0)>;
	def: Pat<(i1 (trunc I64:$Rs)),
	(S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
	}

	def: Pat<(and (srl I32:$Rs, u5_0ImmPred:$u5), 1),
	(I1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>;
	def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1),
	(ToZext64 (I1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>;
	def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1),
	(ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>;

	def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1),
	(I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>;
	def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1),
	(ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>;
	def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1),
	(ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>;

	let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
	def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
	(C2_bitsclri IntRegs:$Rs, imm:$u6)>;
	def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)),
	(C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
	}

	let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
	def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
	(C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;

	def SDTTestBit:
	SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
	def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>;

	def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
	(S2_tstbit_i I32:$Rs, imm:$u5)>;
	def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
	(S2_tstbit_r I32:$Rs, I32:$Rt)>;

	// Add extra complexity to prefer these instructions over bitsset/bitsclr.
	// The reason is that tstbit/ntstbit can be folded into a compound instruction:
	// if ([!]tstbit(...)) jump ...
	let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
	def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)),
	(S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
	def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)),
	(S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
	def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
	(S4_ntstbit_r I32:$Rs, I32:$Rt)>;
	def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
	(S2_tstbit_r I32:$Rs, I32:$Rt)>;
	}

	def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)),
	(S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>;
	def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)),
	(S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>;
	def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)),
	(S2_tstbit_i (LoReg $Rs), (Log2_64 imm:$u6))>;
	def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)),
	(S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 imm:$u6))))>;

	// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
	// represented as a compare against "value & 0xFF", which is an exact match
	// for cmpb (same for cmph). The patterns below do not contain any additional
	// complexity that would make them preferable, and if they were actually used
	// instead of cmpb/cmph, they would result in a compare against register that
	// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
	def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
	(C4_nbitsclri I32:$Rs, imm:$u6)>;
	def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
	(C4_nbitsclr I32:$Rs, I32:$Rt)>;
	def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
	(C4_nbitsset I32:$Rs, I32:$Rt)>;

	// Special patterns to address certain cases where the "top-down" matching
	// algorithm would cause suboptimal selection.

	let AddedComplexity = 100 in {
	// Avoid A4_rcmp[n]eqi in these cases:
	def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
	(I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
	def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
	(I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
	def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))),
	(I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>;
	def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))),
	(I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>;
	def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
	(I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>;
	def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
	(I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>;
	}

	// --(11) PIC ------------------------------------------------------------
	//

	def SDT_HexagonAtGot
	: SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
	def SDT_HexagonAtPcrel
	: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;

	// AT_GOT address-of-GOT, address-of-global, offset-in-global
	def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
	// AT_PCREL address-of-global
	def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;

	def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
	(L2_loadri_io I32:$got, imm:$addr)>;
	def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
	(A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
	def: Pat<(HexagonAtPcrel I32:$addr),
	(C4_addipc imm:$addr)>;

	// The HVX load patterns also match AT_PCREL directly. Make sure that
	// if the selection of this opcode changes, it's updated in all places.


	// --(12) Load -----------------------------------------------------------
	//

	def L1toI32: OutPatFrag<(ops node:$Rs), (A2_subri 0, (i32 $Rs))>;
	def L1toI64: OutPatFrag<(ops node:$Rs), (ToSext64 (L1toI32 $Rs))>;

	def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{
	return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
	}]>;
	def extloadv4i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{
	return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8;
	}]>;

	def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
	return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
	}]>;
	def zextloadv4i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
	return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8;
	}]>;

	def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
	return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
	}]>;
	def sextloadv4i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
	return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8;
	}]>;

	// Patterns to select load-indexed: Rs + Off.
	// - frameindex [+ imm],
	multiclass Loadxfi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
	InstHexagon MI> {
	def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
	(VT (MI AddrFI:$fi, imm:$Off))>;
	def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
	(VT (MI AddrFI:$fi, imm:$Off))>;
	def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
	}

	// Patterns to select load-indexed: Rs + Off.
	// - base reg [+ imm]
	multiclass Loadxgi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
	InstHexagon MI> {
	def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
	(VT (MI IntRegs:$Rs, imm:$Off))>;
	def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))),
	(VT (MI IntRegs:$Rs, imm:$Off))>;
	def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>;
	}

	// Patterns to select load-indexed: Rs + Off. Combines Loadxfi + Loadxgi.
	multiclass Loadxi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
	InstHexagon MI> {
	defm: Loadxfi_pat<Load, VT, ImmPred, MI>;
	defm: Loadxgi_pat<Load, VT, ImmPred, MI>;
	}

	// Patterns to select load reg indexed: Rs + Off with a value modifier.
	// - frameindex [+ imm]
	multiclass Loadxfim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
	PatLeaf ImmPred, InstHexagon MI> {
	def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
	(VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
	def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
	(VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
	def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>;
	}

	// Patterns to select load reg indexed: Rs + Off with a value modifier.
	// - base reg [+ imm]
	multiclass Loadxgim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
	PatLeaf ImmPred, InstHexagon MI> {
	def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
	(VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
	def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))),
	(VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
	def: Pat<(VT (Load I32:$Rs)), (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
	}

	// Patterns to select load reg indexed: Rs + Off with a value modifier.
	// Combines Loadxfim + Loadxgim.
	multiclass Loadxim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
	PatLeaf ImmPred, InstHexagon MI> {
	defm: Loadxfim_pat<Load, VT, ValueMod, ImmPred, MI>;
	defm: Loadxgim_pat<Load, VT, ValueMod, ImmPred, MI>;
	}

	// Pattern to select load reg reg-indexed: Rs + Rt<<u2.
	class Loadxr_shl_pat<PatFrag Load, ValueType VT, InstHexagon MI>
	: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
	(VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;

	// Pattern to select load reg reg-indexed: Rs + Rt<<0.
	class Loadxr_add_pat<PatFrag Load, ValueType VT, InstHexagon MI>
	: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
	(VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;

	// Pattern to select load reg reg-indexed: Rs + Rt<<u2 with value modifier.
	class Loadxrm_shl_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
	(VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>;

	// Pattern to select load reg reg-indexed: Rs + Rt<<0 with value modifier.
	class Loadxrm_add_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
	(VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>;

	// Pattern to select load long-offset reg-indexed: Addr + Rt<<u2.
	// Don't match for u2==0, instead use reg+imm for those cases.
	class Loadxu_pat<PatFrag Load, ValueType VT, PatFrag ImmPred, InstHexagon MI>
	: Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))),
	(VT (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr))>;

	class Loadxum_pat<PatFrag Load, ValueType VT, PatFrag ImmPred, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))),
	(VT (ValueMod (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr)))>;

	// Pattern to select load absolute.
	class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
	: Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;

	// Pattern to select load absolute with value modifier.
	class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;


	let AddedComplexity = 20 in {
	defm: Loadxi_pat<extloadi1, i32, anyimm0, L2_loadrub_io>;
	defm: Loadxi_pat<extloadi8, i32, anyimm0, L2_loadrub_io>;
	defm: Loadxi_pat<extloadi16, i32, anyimm1, L2_loadruh_io>;
	defm: Loadxi_pat<extloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>;
	defm: Loadxi_pat<extloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>;
	defm: Loadxi_pat<sextloadi8, i32, anyimm0, L2_loadrb_io>;
	defm: Loadxi_pat<sextloadi16, i32, anyimm1, L2_loadrh_io>;
	defm: Loadxi_pat<sextloadv2i8, v2i16, anyimm1, L2_loadbsw2_io>;
	defm: Loadxi_pat<sextloadv4i8, v4i16, anyimm2, L2_loadbsw4_io>;
	defm: Loadxi_pat<zextloadi1, i32, anyimm0, L2_loadrub_io>;
	defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>;
	defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>;
	defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>;
	defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>;
	defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>;
	defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>;
	defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>;
	defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>;
	defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>;
	defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>;
	defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>;
	defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>;
	defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>;
	// No sextloadi1.

	defm: Loadxi_pat<atomic_load_8 , i32, anyimm0, L2_loadrub_io>;
	defm: Loadxi_pat<atomic_load_16, i32, anyimm1, L2_loadruh_io>;
	defm: Loadxi_pat<atomic_load_32, i32, anyimm2, L2_loadri_io>;
	defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>;
	}

	let AddedComplexity = 30 in {
	// Loads of i1 are loading a byte, and the byte should be either 0 or 1.
	// It doesn't matter if it's sign- or zero-extended, so use zero-extension
	// everywhere.
	defm: Loadxim_pat<sextloadi1, i32, L1toI32, anyimm0, L2_loadrub_io>;
	defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>;
	defm: Loadxim_pat<sextloadi1, i64, L1toI64, anyimm0, L2_loadrub_io>;
	defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;

	defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>;
	defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>;
	defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>;
	defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
	defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
	defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
	defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>;
	defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>;
	defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>;
	}

	let AddedComplexity = 60 in {
	def: Loadxu_pat<extloadi1, i32, anyimm0, L4_loadrub_ur>;
	def: Loadxu_pat<extloadi8, i32, anyimm0, L4_loadrub_ur>;
	def: Loadxu_pat<extloadi16, i32, anyimm1, L4_loadruh_ur>;
	def: Loadxu_pat<extloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
	def: Loadxu_pat<extloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
	def: Loadxu_pat<sextloadi8, i32, anyimm0, L4_loadrb_ur>;
	def: Loadxu_pat<sextloadi16, i32, anyimm1, L4_loadrh_ur>;
	def: Loadxu_pat<sextloadv2i8, v2i16, anyimm1, L4_loadbsw2_ur>;
	def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbsw4_ur>;
	def: Loadxu_pat<zextloadi1, i32, anyimm0, L4_loadrub_ur>;
	def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>;
	def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>;
	def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
	def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
	def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>;
	def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>;
	def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>;
	def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>;
	def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>;
	def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>;
	def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>;
	def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
	def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;

	def: Loadxum_pat<sextloadi1, i32, anyimm0, L1toI32, L4_loadrub_ur>;
	def: Loadxum_pat<extloadi1, i64, anyimm0, ToAext64, L4_loadrub_ur>;
	def: Loadxum_pat<sextloadi1, i64, anyimm0, L1toI64, L4_loadrub_ur>;
	def: Loadxum_pat<zextloadi1, i64, anyimm0, ToZext64, L4_loadrub_ur>;

	def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
	def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
	def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>;
	def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>;
	def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
	def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>;
	def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>;
	def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
	def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>;
	}

	let AddedComplexity = 40 in {
	def: Loadxr_shl_pat<extloadi1, i32, L4_loadrub_rr>;
	def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
	def: Loadxr_shl_pat<zextloadi1, i32, L4_loadrub_rr>;
	def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
	def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
	def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
	def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
	def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
	def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
	def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>;
	def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>;
	def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
	def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>;
	def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>;
	def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>;
	def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
	def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
	}

	let AddedComplexity = 20 in {
	def: Loadxr_add_pat<extloadi1, i32, L4_loadrub_rr>;
	def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
	def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
	def: Loadxr_add_pat<zextloadi1, i32, L4_loadrub_rr>;
	def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
	def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
	def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
	def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
	def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
	def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>;
	def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>;
	def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
	def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>;
	def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>;
	def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>;
	def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
	def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
	}

	let AddedComplexity = 40 in {
	def: Loadxrm_shl_pat<sextloadi1, i32, L1toI32, L4_loadrub_rr>;
	def: Loadxrm_shl_pat<extloadi1, i64, ToAext64, L4_loadrub_rr>;
	def: Loadxrm_shl_pat<sextloadi1, i64, L1toI64, L4_loadrub_rr>;
	def: Loadxrm_shl_pat<zextloadi1, i64, ToZext64, L4_loadrub_rr>;

	def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
	def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
	def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
	def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
	def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
	def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
	def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
	def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
	def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
	}

	let AddedComplexity = 30 in {
	def: Loadxrm_add_pat<sextloadi1, i32, L1toI32, L4_loadrub_rr>;
	def: Loadxrm_add_pat<extloadi1, i64, ToAext64, L4_loadrub_rr>;
	def: Loadxrm_add_pat<sextloadi1, i64, L1toI64, L4_loadrub_rr>;
	def: Loadxrm_add_pat<zextloadi1, i64, ToZext64, L4_loadrub_rr>;

	def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
	def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
	def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
	def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
	def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
	def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
	def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
	def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
	def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
	}

	// Absolute address

	let AddedComplexity = 60 in {
	def: Loada_pat<extloadi1, i32, anyimm0, PS_loadrubabs>;
	def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
	def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
	def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
	def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
	def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
	def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
	def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
	def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
	def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>;
	def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>;
	def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>;
	def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>;
	def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>;
	def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>;
	def: Loada_pat<load, f32, anyimm2, PS_loadriabs>;
	def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>;

	def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>;
	def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>;
	def: Loada_pat<atomic_load_32, i32, anyimm2, PS_loadriabs>;
	def: Loada_pat<atomic_load_64, i64, anyimm3, PS_loadrdabs>;
	}

	let AddedComplexity = 30 in {
	def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>;
	def: Loadam_pat<sextloadi1, i32, anyimm0, L1toI32, PS_loadrubabs>;
	def: Loadam_pat<extloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>;
	def: Loadam_pat<sextloadi1, i64, anyimm0, L1toI64, PS_loadrubabs>;
	def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>;

	def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>;
	def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>;
	def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
	def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>;
	def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>;
	def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>;
	def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>;
	def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>;
	def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;
	}

	// GP-relative address

	let AddedComplexity = 100 in {
	def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>;
	def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>;
	def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>;
	def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>;
	def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>;
	def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>;
	def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>;
	def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>;
	def: Loada_pat<load, i32, addrgp, L2_loadrigp>;
	def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>;
	def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>;
	def: Loada_pat<load, i64, addrgp, L2_loadrdgp>;
	def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>;
	def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>;
	def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>;
	def: Loada_pat<load, f32, addrgp, L2_loadrigp>;
	def: Loada_pat<load, f64, addrgp, L2_loadrdgp>;

	def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
	def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
	def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
	def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
	}

	let AddedComplexity = 70 in {
	def: Loadam_pat<sextloadi1, i32, addrgp, L1toI32, L2_loadrubgp>;
	def: Loadam_pat<extloadi1, i64, addrgp, ToAext64, L2_loadrubgp>;
	def: Loadam_pat<sextloadi1, i64, addrgp, L1toI64, L2_loadrubgp>;
	def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>;

	def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>;
	def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>;
	def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
	def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>;
	def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>;
	def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>;
	def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>;
	def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>;
	def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>;

	def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
	}

	// Patterns for loads of i1:
	def: Pat<(i1 (load AddrFI:$fi)),
	(C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
	def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))),
	(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
	def: Pat<(i1 (load I32:$Rs)),
	(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;


	// --(13) Store ----------------------------------------------------------
	//

	class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI>
	: Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4),
	(MI I32:$Rx, imm:$s4, Value:$Rt)>;

	def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
	def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
	def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
	def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;

	// Patterns for generating stores, where the address takes different forms:
	// - frameindex,
	// - frameindex + offset,
	// - base + offset,
	// - simple (base address without offset).
	// These would usually be used together (via Storexi_pat defined below), but
	// in some cases one may want to apply different properties (such as
	// AddedComplexity) to the individual patterns.
	class Storexi_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
	: Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;

	multiclass Storexi_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
	InstHexagon MI> {
	def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
	(MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
	def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
	(MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
	}

	multiclass Storexi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
	InstHexagon MI> {
	def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
	(MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
	def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)),
	(MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
	}

	class Storexi_base_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
	: Pat<(Store Value:$Rt, I32:$Rs),
	(MI IntRegs:$Rs, 0, Value:$Rt)>;

	// Patterns for generating stores, where the address takes different forms,
	// and where the value being stored is transformed through the value modifier
	// ValueMod. The address forms are same as above.
	class Storexim_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(Store Value:$Rs, AddrFI:$fi),
	(MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;

	multiclass Storexim_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
	PatFrag ValueMod, InstHexagon MI> {
	def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
	(MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
	def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
	(MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
	}

	multiclass Storexim_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
	PatFrag ValueMod, InstHexagon MI> {
	def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
	(MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
	def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)),
	(MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
	}

	class Storexim_base_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(Store Value:$Rt, I32:$Rs),
	(MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;

	multiclass Storexi_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
	InstHexagon MI> {
	defm: Storexi_fi_add_pat <Store, Value, ImmPred, MI>;
	def: Storexi_fi_pat <Store, Value, MI>;
	defm: Storexi_add_pat <Store, Value, ImmPred, MI>;
	}

	multiclass Storexim_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
	PatFrag ValueMod, InstHexagon MI> {
	defm: Storexim_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
	def: Storexim_fi_pat <Store, Value, ValueMod, MI>;
	defm: Storexim_add_pat <Store, Value, ImmPred, ValueMod, MI>;
	}

	// Reg<<S + Imm
	class Storexu_shl_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, InstHexagon MI>
	: Pat<(Store Value:$Rt, (add (shl I32:$Ru, u2_0ImmPred:$u2), ImmPred:$A)),
	(MI IntRegs:$Ru, imm:$u2, ImmPred:$A, Value:$Rt)>;

	// Reg<<S + Reg
	class Storexr_shl_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
	: Pat<(Store Value:$Ru, (add I32:$Rs, (shl I32:$Rt, u2_0ImmPred:$u2))),
	(MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;

	// Reg + Reg
	class Storexr_add_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
	: Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
	(MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;

	class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
	: Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;

	class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(Store Value:$val, Addr:$addr),
	(MI Addr:$addr, (ValueMod Value:$val))>;

	// Regular stores in the DAG have two operands: value and address.
	// Atomic stores also have two, but they are reversed: address, value.
	// To use atomic stores with the patterns, they need to have their operands
	// swapped. This relies on the knowledge that the F.Fragment uses names
	// "ptr" and "val".
	class AtomSt<PatFrag F>
	: PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode,
	F.OperandTransform> {
	let IsAtomic = F.IsAtomic;
	let MemoryVT = F.MemoryVT;
	}


	def IMM_BYTE : SDNodeXForm<imm, [{
	// -1 can be represented as 255, etc.
	// assigning to a byte restores our desired signed value.
	int8_t imm = N->getSExtValue();
	return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
	}]>;

	def IMM_HALF : SDNodeXForm<imm, [{
	// -1 can be represented as 65535, etc.
	// assigning to a short restores our desired signed value.
	int16_t imm = N->getSExtValue();
	return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
	}]>;

	def IMM_WORD : SDNodeXForm<imm, [{
	// -1 can be represented as 4294967295, etc.
	// Currently, it's not doing this. But some optimization
	// might convert -1 to a large +ve number.
	// assigning to a word restores our desired signed value.
	int32_t imm = N->getSExtValue();
	return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
	}]>;

	def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
	def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
	def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;

	// Even though the offset is not extendable in the store-immediate, we
	// can still generate the fi# in the base address. If the final offset
	// is not valid for the instruction, we will replace it with a scratch
	// register.
	class SmallStackStore<PatFrag Store>
	: PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{
	return isSmallStackStore(cast<StoreSDNode>(N));
	}]>;

	// This is the complement of SmallStackStore.
	class LargeStackStore<PatFrag Store>
	: PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{
	return !isSmallStackStore(cast<StoreSDNode>(N));
	}]>;

	// Preferred addressing modes for various combinations of stored value
	// and address computation.
	// For stores where the address and value are both immediates, prefer
	// store-immediate. The reason is that the constant-extender optimization
	// can replace store-immediate with a store-register, but there is nothing
	// to generate a store-immediate out of a store-register.
	//
	// C R F F+C R+C R+R R<<S+C R<<S+R
	// --+-------+-----+-----+------+-----+-----+--------+--------
	// C \| imm \| imm \| imm \| imm \| imm \| rr \| ur \| rr
	// R \| abs* \| io \| io \| io \| io \| rr \| ur \| rr
	//
	// (*) Absolute or GP-relative.
	//
	// Note that any expression can be matched by Reg. In particular, an immediate
	// can always be placed in a register, so patterns checking for Imm should
	// have a higher priority than the ones involving Reg that could also match.
	// For example, *(p+4) could become r1=#4; memw(r0+r1<<#0) instead of the
	// preferred memw(r0+#4). Similarly Reg+Imm or Reg+Reg should be tried before
	// Reg alone.
	//
	// The order in which the different combinations are tried:
	//
	// C F R F+C R+C R+R R<<S+C R<<S+R
	// --+-------+-----+-----+------+-----+-----+--------+--------
	// C \| 1 \| 6 \| - \| 5 \| 9 \| - \| - \| -
	// R \| 2 \| 8 \| 12 \| 7 \| 10 \| 11 \| 3 \| 4


	// First, match the unusual case of doubleword store into Reg+Imm4, i.e.
	// a store where the offset Imm4 is a multiple of 4, but not of 8. This
	// implies that Reg is also a proper multiple of 4. To still generate a
	// doubleword store, add 4 to Reg, and subtract 4 from the offset.

	def s30_2ProperPred : PatLeaf<(i32 imm), [{
	int64_t v = (int64_t)N->getSExtValue();
	return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
	}]>;
	def RoundTo8 : SDNodeXForm<imm, [{
	int32_t Imm = N->getSExtValue();
	return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
	}]>;

	let AddedComplexity = 150 in
	def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
	(S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;

	class Storexi_abs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
	: Pat<(Store Value:$val, anyimm:$addr),
	(MI (ToI32 $addr), 0, Value:$val)>;
	class Storexim_abs_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
	InstHexagon MI>
	: Pat<(Store Value:$val, anyimm:$addr),
	(MI (ToI32 $addr), 0, (ValueMod Value:$val))>;

	let AddedComplexity = 140 in {
	def: Storexim_abs_pat<truncstorei8, anyint, ToImmByte, S4_storeirb_io>;
	def: Storexim_abs_pat<truncstorei16, anyint, ToImmHalf, S4_storeirh_io>;
	def: Storexim_abs_pat<store, anyint, ToImmWord, S4_storeiri_io>;

	def: Storexi_abs_pat<truncstorei8, anyimm, S4_storeirb_io>;
	def: Storexi_abs_pat<truncstorei16, anyimm, S4_storeirh_io>;
	def: Storexi_abs_pat<store, anyimm, S4_storeiri_io>;
	}

	// GP-relative address
	let AddedComplexity = 120 in {
	def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
	def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
	def: Storea_pat<store, I32, addrgp, S2_storerigp>;
	def: Storea_pat<store, V4I8, addrgp, S2_storerigp>;
	def: Storea_pat<store, V2I16, addrgp, S2_storerigp>;
	def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
	def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>;
	def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>;
	def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>;
	def: Storea_pat<store, F32, addrgp, S2_storerigp>;
	def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
	def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
	def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
	def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
	def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>;
	def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>;
	def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
	def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>;
	def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>;
	def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>;

	def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>;
	def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>;
	def: Stoream_pat<truncstorei32, I64, addrgp, LoReg, S2_storerigp>;
	def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
	}

	// Absolute address
	let AddedComplexity = 110 in {
	def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>;
	def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>;
	def: Storea_pat<store, I32, anyimm2, PS_storeriabs>;
	def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>;
	def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>;
	def: Storea_pat<store, I64, anyimm3, PS_storerdabs>;
	def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>;
	def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>;
	def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>;
	def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
	def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
	def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
	def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
	def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
	def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>;
	def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>;
	def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
	def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>;
	def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>;
	def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>;

	def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>;
	def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>;
	def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg, PS_storeriabs>;
	def: Stoream_pat<store, I1, anyimm0, I1toI32, PS_storerbabs>;
	}

	// Reg<<S + Imm
	let AddedComplexity = 100 in {
	def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>;
	def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
	def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>;
	def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>;
	def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>;
	def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>;
	def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>;
	def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>;
	def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>;
	def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>;
	def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>;

	def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)),
	(S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>;
	}

	// Reg<<S + Reg
	let AddedComplexity = 90 in {
	def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>;
	def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
	def: Storexr_shl_pat<store, I32, S4_storeri_rr>;
	def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>;
	def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>;
	def: Storexr_shl_pat<store, I64, S4_storerd_rr>;
	def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>;
	def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>;
	def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>;
	def: Storexr_shl_pat<store, F32, S4_storeri_rr>;
	def: Storexr_shl_pat<store, F64, S4_storerd_rr>;

	def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
	(S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
	}

	class SS_<PatFrag F> : SmallStackStore<F>;
	class LS_<PatFrag F> : LargeStackStore<F>;

	multiclass IMFA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> {
	defm: Storexim_fi_add_pat<S, V, O, M, I>;
	}
	multiclass IFA_<PatFrag S, PatFrag V, PatFrag O, InstHexagon I> {
	defm: Storexi_fi_add_pat<S, V, O, I>;
	}

	// Fi+Imm, store-immediate
	let AddedComplexity = 80 in {
	defm: IMFA_<SS_<truncstorei8>, anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>;
	defm: IMFA_<SS_<truncstorei16>, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>;
	defm: IMFA_<SS_<store>, anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>;

	defm: IFA_<SS_<truncstorei8>, anyimm, u6_0ImmPred, S4_storeirb_io>;
	defm: IFA_<SS_<truncstorei16>, anyimm, u6_1ImmPred, S4_storeirh_io>;
	defm: IFA_<SS_<store>, anyimm, u6_2ImmPred, S4_storeiri_io>;

	// For large-stack stores, generate store-register (prefer explicit Fi
	// in the address).
	defm: IMFA_<LS_<truncstorei8>, anyimm, u6_0ImmPred, ToI32, S2_storerb_io>;
	defm: IMFA_<LS_<truncstorei16>, anyimm, u6_1ImmPred, ToI32, S2_storerh_io>;
	defm: IMFA_<LS_<store>, anyimm, u6_2ImmPred, ToI32, S2_storeri_io>;
	}

	// Fi, store-immediate
	let AddedComplexity = 70 in {
	def: Storexim_fi_pat<SS_<truncstorei8>, anyint, ToImmByte, S4_storeirb_io>;
	def: Storexim_fi_pat<SS_<truncstorei16>, anyint, ToImmHalf, S4_storeirh_io>;
	def: Storexim_fi_pat<SS_<store>, anyint, ToImmWord, S4_storeiri_io>;

	def: Storexi_fi_pat<SS_<truncstorei8>, anyimm, S4_storeirb_io>;
	def: Storexi_fi_pat<SS_<truncstorei16>, anyimm, S4_storeirh_io>;
	def: Storexi_fi_pat<SS_<store>, anyimm, S4_storeiri_io>;

	// For large-stack stores, generate store-register (prefer explicit Fi
	// in the address).
	def: Storexim_fi_pat<LS_<truncstorei8>, anyimm, ToI32, S2_storerb_io>;
	def: Storexim_fi_pat<LS_<truncstorei16>, anyimm, ToI32, S2_storerh_io>;
	def: Storexim_fi_pat<LS_<store>, anyimm, ToI32, S2_storeri_io>;
	}

	// Fi+Imm, Fi, store-register
	let AddedComplexity = 60 in {
	defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>;
	defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
	defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>;
	defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>;
	defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>;
	defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>;
	defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>;
	defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>;
	defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>;
	defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>;
	defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>;
	defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>;

	def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>;
	def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>;
	def: Storexi_fi_pat<store, I32, S2_storeri_io>;
	def: Storexi_fi_pat<store, V4I8, S2_storeri_io>;
	def: Storexi_fi_pat<store, V2I16, S2_storeri_io>;
	def: Storexi_fi_pat<store, I64, S2_storerd_io>;
	def: Storexi_fi_pat<store, V8I8, S2_storerd_io>;
	def: Storexi_fi_pat<store, V4I16, S2_storerd_io>;
	def: Storexi_fi_pat<store, V2I32, S2_storerd_io>;
	def: Storexi_fi_pat<store, F32, S2_storeri_io>;
	def: Storexi_fi_pat<store, F64, S2_storerd_io>;
	def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>;
	}


	multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> {
	defm: Storexim_add_pat<S, V, O, M, I>;
	}
	multiclass IRA_<PatFrag S, PatFrag V, PatFrag O, InstHexagon I> {
	defm: Storexi_add_pat<S, V, O, I>;
	}

	// Reg+Imm, store-immediate
	let AddedComplexity = 50 in {
	defm: IMRA_<truncstorei8, anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>;
	defm: IMRA_<truncstorei16, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>;
	defm: IMRA_<store, anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>;

	defm: IRA_<truncstorei8, anyimm, u6_0ImmPred, S4_storeirb_io>;
	defm: IRA_<truncstorei16, anyimm, u6_1ImmPred, S4_storeirh_io>;
	defm: IRA_<store, anyimm, u6_2ImmPred, S4_storeiri_io>;
	}

	// Reg+Imm, store-register
	let AddedComplexity = 40 in {
	defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>;
	defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>;
	defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>;

	defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>;
	defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>;
	defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>;
	defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>;

	defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
	defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
	defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>;
	defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>;
	defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>;
	}

	// Reg+Reg
	let AddedComplexity = 30 in {
	def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>;
	def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
	def: Storexr_add_pat<store, I32, S4_storeri_rr>;
	def: Storexr_add_pat<store, V4I8, S4_storeri_rr>;
	def: Storexr_add_pat<store, V2I16, S4_storeri_rr>;
	def: Storexr_add_pat<store, I64, S4_storerd_rr>;
	def: Storexr_add_pat<store, V8I8, S4_storerd_rr>;
	def: Storexr_add_pat<store, V4I16, S4_storerd_rr>;
	def: Storexr_add_pat<store, V2I32, S4_storerd_rr>;
	def: Storexr_add_pat<store, F32, S4_storeri_rr>;
	def: Storexr_add_pat<store, F64, S4_storerd_rr>;

	def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)),
	(S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>;
	}

	// Reg, store-immediate
	let AddedComplexity = 20 in {
	def: Storexim_base_pat<truncstorei8, anyint, ToImmByte, S4_storeirb_io>;
	def: Storexim_base_pat<truncstorei16, anyint, ToImmHalf, S4_storeirh_io>;
	def: Storexim_base_pat<store, anyint, ToImmWord, S4_storeiri_io>;

	def: Storexi_base_pat<truncstorei8, anyimm, S4_storeirb_io>;
	def: Storexi_base_pat<truncstorei16, anyimm, S4_storeirh_io>;
	def: Storexi_base_pat<store, anyimm, S4_storeiri_io>;
	}

	// Reg, store-register
	let AddedComplexity = 10 in {
	def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>;
	def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>;
	def: Storexi_base_pat<store, I32, S2_storeri_io>;
	def: Storexi_base_pat<store, V4I8, S2_storeri_io>;
	def: Storexi_base_pat<store, V2I16, S2_storeri_io>;
	def: Storexi_base_pat<store, I64, S2_storerd_io>;
	def: Storexi_base_pat<store, V8I8, S2_storerd_io>;
	def: Storexi_base_pat<store, V4I16, S2_storerd_io>;
	def: Storexi_base_pat<store, V2I32, S2_storerd_io>;
	def: Storexi_base_pat<store, F32, S2_storeri_io>;
	def: Storexi_base_pat<store, F64, S2_storerd_io>;

	def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
	def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
	def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
	def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>;

	def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>;
	def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>;
	}


	// --(14) Memop ----------------------------------------------------------
	//

	def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
	int8_t V = N->getSExtValue();
	return -32 < V && V <= -1;
	}]>;

	def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
	int16_t V = N->getSExtValue();
	return -32 < V && V <= -1;
	}]>;

	def m5_0ImmPred : PatLeaf<(i32 imm), [{
	int64_t V = N->getSExtValue();
	return -31 <= V && V <= -1;
	}]>;

	def IsNPow2_8 : PatLeaf<(i32 imm), [{
	uint8_t NV = ~N->getZExtValue();
	return isPowerOf2_32(NV);
	}]>;

	def IsNPow2_16 : PatLeaf<(i32 imm), [{
	uint16_t NV = ~N->getZExtValue();
	return isPowerOf2_32(NV);
	}]>;

	def Log2_8 : SDNodeXForm<imm, [{
	uint8_t V = N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
	}]>;

	def Log2_16 : SDNodeXForm<imm, [{
	uint16_t V = N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
	}]>;

	def LogN2_8 : SDNodeXForm<imm, [{
	uint8_t NV = ~N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
	}]>;

	def LogN2_16 : SDNodeXForm<imm, [{
	uint16_t NV = ~N->getZExtValue();
	return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
	}]>;

	def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;

	multiclass Memopxr_base_pat<PatFrag Load, PatFrag Store, SDNode Oper,
	InstHexagon MI> {
	// Addr: i32
	def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
	(MI I32:$Rs, 0, I32:$A)>;
	// Addr: fi
	def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
	(MI AddrFI:$Rs, 0, I32:$A)>;
	}

	multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
	SDNode Oper, InstHexagon MI> {
	// Addr: i32
	def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
	(add I32:$Rs, ImmPred:$Off)),
	(MI I32:$Rs, imm:$Off, I32:$A)>;
	def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), I32:$A),
	(IsOrAdd I32:$Rs, ImmPred:$Off)),
	(MI I32:$Rs, imm:$Off, I32:$A)>;
	// Addr: fi
	def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
	(add AddrFI:$Rs, ImmPred:$Off)),
	(MI AddrFI:$Rs, imm:$Off, I32:$A)>;
	def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
	(IsOrAdd AddrFI:$Rs, ImmPred:$Off)),
	(MI AddrFI:$Rs, imm:$Off, I32:$A)>;
	}

	multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
	SDNode Oper, InstHexagon MI> {
	let Predicates = [UseMEMOPS] in {
	defm: Memopxr_base_pat <Load, Store, Oper, MI>;
	defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
	}
	}

	let AddedComplexity = 200 in {
	// add reg
	defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
	/anyext/ L4_add_memopb_io>;
	defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
	/sext/ L4_add_memopb_io>;
	defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
	/zext/ L4_add_memopb_io>;
	defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
	/anyext/ L4_add_memoph_io>;
	defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
	/sext/ L4_add_memoph_io>;
	defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
	/zext/ L4_add_memoph_io>;
	defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;

	// sub reg
	defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
	/anyext/ L4_sub_memopb_io>;
	defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
	/sext/ L4_sub_memopb_io>;
	defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
	/zext/ L4_sub_memopb_io>;
	defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
	/anyext/ L4_sub_memoph_io>;
	defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
	/sext/ L4_sub_memoph_io>;
	defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
	/zext/ L4_sub_memoph_io>;
	defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;

	// and reg
	defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
	/anyext/ L4_and_memopb_io>;
	defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
	/sext/ L4_and_memopb_io>;
	defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
	/zext/ L4_and_memopb_io>;
	defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
	/anyext/ L4_and_memoph_io>;
	defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
	/sext/ L4_and_memoph_io>;
	defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
	/zext/ L4_and_memoph_io>;
	defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;

	// or reg
	defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
	/anyext/ L4_or_memopb_io>;
	defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
	/sext/ L4_or_memopb_io>;
	defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
	/zext/ L4_or_memopb_io>;
	defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
	/anyext/ L4_or_memoph_io>;
	defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
	/sext/ L4_or_memoph_io>;
	defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
	/zext/ L4_or_memoph_io>;
	defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
	}


	multiclass Memopxi_base_pat<PatFrag Load, PatFrag Store, SDNode Oper,
	PatFrag Arg, SDNodeXForm ArgMod, InstHexagon MI> {
	// Addr: i32
	def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
	(MI I32:$Rs, 0, (ArgMod Arg:$A))>;
	// Addr: fi
	def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
	(MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
	}

	multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
	SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
	InstHexagon MI> {
	// Addr: i32
	def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
	(add I32:$Rs, ImmPred:$Off)),
	(MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
	def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), Arg:$A),
	(IsOrAdd I32:$Rs, ImmPred:$Off)),
	(MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
	// Addr: fi
	def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
	(add AddrFI:$Rs, ImmPred:$Off)),
	(MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
	def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
	(IsOrAdd AddrFI:$Rs, ImmPred:$Off)),
	(MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
	}

	multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
	SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
	InstHexagon MI> {
	let Predicates = [UseMEMOPS] in {
	defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>;
	defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
	}
	}

	let AddedComplexity = 220 in {
	// add imm
	defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
	/anyext/ IdImm, L4_iadd_memopb_io>;
	defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
	/sext/ IdImm, L4_iadd_memopb_io>;
	defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
	/zext/ IdImm, L4_iadd_memopb_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
	/anyext/ IdImm, L4_iadd_memoph_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
	/sext/ IdImm, L4_iadd_memoph_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
	/zext/ IdImm, L4_iadd_memoph_io>;
	defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
	L4_iadd_memopw_io>;
	defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
	/anyext/ NegImm8, L4_iadd_memopb_io>;
	defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
	/sext/ NegImm8, L4_iadd_memopb_io>;
	defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
	/zext/ NegImm8, L4_iadd_memopb_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
	/anyext/ NegImm16, L4_iadd_memoph_io>;
	defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
	/sext/ NegImm16, L4_iadd_memoph_io>;
	defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
	/zext/ NegImm16, L4_iadd_memoph_io>;
	defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
	L4_iadd_memopw_io>;

	// sub imm
	defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
	/anyext/ IdImm, L4_isub_memopb_io>;
	defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
	/sext/ IdImm, L4_isub_memopb_io>;
	defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
	/zext/ IdImm, L4_isub_memopb_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
	/anyext/ IdImm, L4_isub_memoph_io>;
	defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
	/sext/ IdImm, L4_isub_memoph_io>;
	defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
	/zext/ IdImm, L4_isub_memoph_io>;
	defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
	L4_isub_memopw_io>;
	defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
	/anyext/ NegImm8, L4_isub_memopb_io>;
	defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
	/sext/ NegImm8, L4_isub_memopb_io>;
	defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
	/zext/ NegImm8, L4_isub_memopb_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
	/anyext/ NegImm16, L4_isub_memoph_io>;
	defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
	/sext/ NegImm16, L4_isub_memoph_io>;
	defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
	/zext/ NegImm16, L4_isub_memoph_io>;
	defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
	L4_isub_memopw_io>;

	// clrbit imm
	defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
	/anyext/ LogN2_8, L4_iand_memopb_io>;
	defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
	/sext/ LogN2_8, L4_iand_memopb_io>;
	defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
	/zext/ LogN2_8, L4_iand_memopb_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
	/anyext/ LogN2_16, L4_iand_memoph_io>;
	defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
	/sext/ LogN2_16, L4_iand_memoph_io>;
	defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
	/zext/ LogN2_16, L4_iand_memoph_io>;
	defm: Memopxi_pat<load, store, u6_2ImmPred, and, IsNPow2_32,
	LogN2_32, L4_iand_memopw_io>;

	// setbit imm
	defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
	/anyext/ Log2_8, L4_ior_memopb_io>;
	defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
	/sext/ Log2_8, L4_ior_memopb_io>;
	defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
	/zext/ Log2_8, L4_ior_memopb_io>;
	defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
	/anyext/ Log2_16, L4_ior_memoph_io>;
	defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
	/sext/ Log2_16, L4_ior_memoph_io>;
	defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
	/zext/ Log2_16, L4_ior_memoph_io>;
	defm: Memopxi_pat<load, store, u6_2ImmPred, or, IsPow2_32,
	Log2_32, L4_ior_memopw_io>;
	}


	// --(15) Call -----------------------------------------------------------
	//

	// Pseudo instructions.
	def SDT_SPCallSeqStart
	: SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
	def SDT_SPCallSeqEnd
	: SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;

	def callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
	[SDNPHasChain, SDNPOutGlue]>;
	def callseq_end: SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
	[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;

	def SDT_SPCall: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;

	def HexagonTCRet: SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
	[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
	def callv3: SDNode<"HexagonISD::CALL", SDT_SPCall,
	[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
	def callv3nr: SDNode<"HexagonISD::CALLnr", SDT_SPCall,
	[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;

	def: Pat<(callseq_start timm:$amt, timm:$amt2),
	(ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>;
	def: Pat<(callseq_end timm:$amt1, timm:$amt2),
	(ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;

	def: Pat<(HexagonTCRet tglobaladdr:$dst), (PS_tailcall_i tglobaladdr:$dst)>;
	def: Pat<(HexagonTCRet texternalsym:$dst), (PS_tailcall_i texternalsym:$dst)>;
	def: Pat<(HexagonTCRet I32:$dst), (PS_tailcall_r I32:$dst)>;

	def: Pat<(callv3 I32:$dst), (J2_callr I32:$dst)>;
	def: Pat<(callv3 tglobaladdr:$dst), (J2_call tglobaladdr:$dst)>;
	def: Pat<(callv3 texternalsym:$dst), (J2_call texternalsym:$dst)>;
	def: Pat<(callv3 tglobaltlsaddr:$dst), (J2_call tglobaltlsaddr:$dst)>;

	def: Pat<(callv3nr I32:$dst), (PS_callr_nr I32:$dst)>;
	def: Pat<(callv3nr tglobaladdr:$dst), (PS_call_nr tglobaladdr:$dst)>;
	def: Pat<(callv3nr texternalsym:$dst), (PS_call_nr texternalsym:$dst)>;

	def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
	[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
	def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;

	def: Pat<(retflag), (PS_jmpret (i32 R31))>;
	def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>;


	// --(16) Branch ---------------------------------------------------------
	//

	def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>;
	def: Pat<(brind I32:$dst), (J2_jumpr I32:$dst)>;

	def: Pat<(brcond I1:$Pu, bb:$dst),
	(J2_jumpt I1:$Pu, bb:$dst)>;
	def: Pat<(brcond (not I1:$Pu), bb:$dst),
	(J2_jumpf I1:$Pu, bb:$dst)>;
	def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
	(J2_jumpf I1:$Pu, bb:$dst)>;
	def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
	(J2_jumpf I1:$Pu, bb:$dst)>;
	def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
	(J2_jumpt I1:$Pu, bb:$dst)>;


	// --(17) Misc -----------------------------------------------------------


	// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
	// for C code of the form r = (c>='0' && c<='9') ? 1 : 0.
	// The isdigit transformation relies on two 'clever' aspects:
	// 1) The data type is unsigned which allows us to eliminate a zero test after
	// biasing the expression by 48. We are depending on the representation of
	// the unsigned types, and semantics.
	// 2) The front end has converted <= 9 into < 10 on entry to LLVM.
	//
	// For the C code:
	// retval = (c >= '0' && c <= '9') ? 1 : 0;
	// The code is transformed upstream of llvm into
	// retval = (c-48) < 10 ? 1 : 0;

	def u7_0PosImmPred : ImmLeaf<i32, [{
	// True if the immediate fits in an 7-bit unsigned field and is positive.
	return Imm > 0 && isUInt<7>(Imm);
	}]>;

	let AddedComplexity = 139 in
	def: Pat<(i32 (zext (i1 (setult (and I32:$Rs, 255), u7_0PosImmPred:$u7)))),
	(C2_muxii (A4_cmpbgtui IntRegs:$Rs, (UDEC1 imm:$u7)), 0, 1)>;

	let AddedComplexity = 100 in
	def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
	(i32 (extloadi8 (add I32:$b, 3))),
	24, 8),
	(i32 16)),
	(shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
	(zextloadi8 I32:$b)),
	(A2_swiz (L2_loadri_io I32:$b, 0))>;


	// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
	// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
	// We don't really want either one here.
	def SDTHexagonDCFETCH: SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
	def HexagonDCFETCH: SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
	[SDNPHasChain]>;

	def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
	(Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
	def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
	(Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;

	def SDTHexagonALLOCA
	: SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
	def HexagonALLOCA
	: SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, [SDNPHasChain]>;

	def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
	(PS_alloca IntRegs:$Rs, imm:$A)>;

	def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
	def: Pat<(HexagonBARRIER), (Y2_barrier)>;

	def: Pat<(trap), (PS_crash)>;

	// Read cycle counter.
	def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
	def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
	[SDNPHasChain]>;

	def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;

	// The declared return value of the store-locked intrinsics is i32, but
	// the instructions actually define i1. To avoid register copies from
	// IntRegs to PredRegs and back, fold the entire pattern checking the
	// result against true/false.
	let AddedComplexity = 100 in {
	def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
	(S2_storew_locked I32:$Rs, I32:$Rt)>;
	def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
	(C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>;
	def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
	(S4_stored_locked I32:$Rs, I64:$Rt)>;
	def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
	(C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
	}

	def: Pat<(int_hexagon_instrprof_custom (HexagonAtPcrel tglobaladdr:$addr), u32_0ImmPred:$I),
	(PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>;

	def: Pat<(int_hexagon_instrprof_custom (HexagonCONST32 tglobaladdr:$addr), u32_0ImmPred:$I),
	(PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>;
	diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
	index cf17c51b04fc..e43b33eed470 100644
	--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
	@@ -1,57839 +1,57854 @@
	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the interfaces that X86 uses to lower LLVM code into a
	// selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#include "X86ISelLowering.h"
	#include "MCTargetDesc/X86ShuffleDecode.h"
	#include "X86.h"
	#include "X86CallingConv.h"
	#include "X86FrameLowering.h"
	#include "X86InstrBuilder.h"
	#include "X86IntrinsicsInfo.h"
	#include "X86MachineFunctionInfo.h"
	#include "X86TargetMachine.h"
	#include "X86TargetObjectFile.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Analysis/BlockFrequencyInfo.h"
	#include "llvm/Analysis/EHPersonalities.h"
	#include "llvm/Analysis/ObjCARCUtil.h"
	#include "llvm/Analysis/ProfileSummaryInfo.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/CodeGen/IntrinsicLowering.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineJumpTableInfo.h"
	#include "llvm/CodeGen/MachineLoopInfo.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/WinEHFuncInfo.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCExpr.h"
	#include "llvm/MC/MCSymbol.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <bitset>
	#include <cctype>
	#include <numeric>
	using namespace llvm;

	#define DEBUG_TYPE "x86-isel"

	STATISTIC(NumTailCalls, "Number of tail calls");

	static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
	"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
	cl::desc(
	"Sets the preferable loop alignment for experiments (as log2 bytes) "
	"for innermost loops only. If specified, this option overrides "
	"alignment set by x86-experimental-pref-loop-alignment."),
	cl::Hidden);

	static cl::opt<bool> MulConstantOptimization(
	"mul-constant-optimization", cl::init(true),
	cl::desc("Replace 'mul x, Const' with more effective instructions like "
	"SHIFT, LEA, etc."),
	cl::Hidden);

	static cl::opt<bool> ExperimentalUnorderedISEL(
	"x86-experimental-unordered-atomic-isel", cl::init(false),
	cl::desc("Use LoadSDNode and StoreSDNode instead of "
	"AtomicSDNode for unordered atomic loads and "
	"stores respectively."),
	cl::Hidden);

	/// Call this when the user attempts to do something unsupported, like
	/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
	/// report_fatal_error, so calling code should attempt to recover without
	/// crashing.
	static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
	const char *Msg) {
	MachineFunction &MF = DAG.getMachineFunction();
	DAG.getContext()->diagnose(
	DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
	}

	X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
	const X86Subtarget &STI)
	: TargetLowering(TM), Subtarget(STI) {
	bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
	MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));

	// Set up the TargetLowering object.

	// X86 is weird. It always uses i8 for shift amounts and setcc results.
	setBooleanContents(ZeroOrOneBooleanContent);
	// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

	// For 64-bit, since we have so many registers, use the ILP scheduler.
	// For 32-bit, use the register pressure specific scheduling.
	// For Atom, always use ILP scheduling.
	if (Subtarget.isAtom())
	setSchedulingPreference(Sched::ILP);
	else if (Subtarget.is64Bit())
	setSchedulingPreference(Sched::ILP);
	else
	setSchedulingPreference(Sched::RegPressure);
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());

	// Bypass expensive divides and use cheaper ones.
	if (TM.getOptLevel() >= CodeGenOpt::Default) {
	if (Subtarget.hasSlowDivide32())
	addBypassSlowDiv(32, 8);
	if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
	addBypassSlowDiv(64, 32);
	}

	// Setup Windows compiler runtime calls.
	if (Subtarget.isTargetWindowsMSVC() \|\| Subtarget.isTargetWindowsItanium()) {
	static const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const CallingConv::ID CC;
	} LibraryCalls[] = {
	{ RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
	{ RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
	{ RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
	{ RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
	{ RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	}
	}

	if (Subtarget.getTargetTriple().isOSMSVCRT()) {
	// MSVCRT doesn't have powi; fall back to pow
	setLibcallName(RTLIB::POWI_F32, nullptr);
	setLibcallName(RTLIB::POWI_F64, nullptr);
	}

	// If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
	// 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
	// FIXME: Should we be limiting the atomic size on other configs? Default is
	// 1024.
	if (!Subtarget.canUseCMPXCHG8B())
	setMaxAtomicSizeInBitsSupported(32);

	setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);

	setMaxLargeFPConvertBitWidthSupported(128);

	// Set up the register classes.
	addRegisterClass(MVT::i8, &X86::GR8RegClass);
	addRegisterClass(MVT::i16, &X86::GR16RegClass);
	addRegisterClass(MVT::i32, &X86::GR32RegClass);
	if (Subtarget.is64Bit())
	addRegisterClass(MVT::i64, &X86::GR64RegClass);

	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

	// We don't accept any truncstore of integer registers.
	setTruncStoreAction(MVT::i64, MVT::i32, Expand);
	setTruncStoreAction(MVT::i64, MVT::i16, Expand);
	setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
	setTruncStoreAction(MVT::i32, MVT::i16, Expand);
	setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
	setTruncStoreAction(MVT::i16, MVT::i8, Expand);

	setTruncStoreAction(MVT::f64, MVT::f32, Expand);

	// SETOEQ and SETUNE require checking two conditions.
	for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
	setCondCodeAction(ISD::SETOEQ, VT, Expand);
	setCondCodeAction(ISD::SETUNE, VT, Expand);
	}

	// Integer absolute.
	if (Subtarget.canUseCMOV()) {
	setOperationAction(ISD::ABS , MVT::i16 , Custom);
	setOperationAction(ISD::ABS , MVT::i32 , Custom);
	if (Subtarget.is64Bit())
	setOperationAction(ISD::ABS , MVT::i64 , Custom);
	}

	// Signed saturation subtraction.
	setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
	setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
	setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
	if (Subtarget.is64Bit())
	setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);

	// Funnel shifts.
	for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
	// For slow shld targets we only lower for code size.
	LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;

	setOperationAction(ShiftOp , MVT::i8 , Custom);
	setOperationAction(ShiftOp , MVT::i16 , Custom);
	setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
	if (Subtarget.is64Bit())
	setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
	}

	if (!Subtarget.useSoftFloat()) {
	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
	// operation.
	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
	// We have an algorithm for SSE2, and we turn this into a 64-bit
	// FILD or VCVTUSI2SS/SD for other targets.
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
	// We have an algorithm for SSE2->double, and we turn this into a
	// 64-bit FILD followed by conditional FADD for other targets.
	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);

	// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
	// this operation.
	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
	// SSE has no i16 to fp conversion, only i32. We promote in the handler
	// to allow f80 to use i16 and f64 to use i16 with sse1 only
	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
	// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
	// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
	// are Legal, f80 is custom lowered.
	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);

	// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
	// this operation.
	setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
	// FIXME: This doesn't generate invalid exception when it should. PR44019.
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
	setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
	// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
	// are Legal, f80 is custom lowered.
	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);

	// Handle FP_TO_UINT by promoting the destination to a larger signed
	// conversion.
	setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
	// FIXME: This doesn't generate invalid exception when it should. PR44019.
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
	setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
	// FIXME: This doesn't generate invalid exception when it should. PR44019.
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);

	setOperationAction(ISD::LRINT, MVT::f32, Custom);
	setOperationAction(ISD::LRINT, MVT::f64, Custom);
	setOperationAction(ISD::LLRINT, MVT::f32, Custom);
	setOperationAction(ISD::LLRINT, MVT::f64, Custom);

	if (!Subtarget.is64Bit()) {
	setOperationAction(ISD::LRINT, MVT::i64, Custom);
	setOperationAction(ISD::LLRINT, MVT::i64, Custom);
	}
	}

	if (Subtarget.hasSSE2()) {
	// Custom lowering for saturating float to int conversions.
	// We handle promotion to larger result types manually.
	for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
	setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
	setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
	}
	if (Subtarget.is64Bit()) {
	setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
	}
	}

	// Handle address space casts between mixed sized pointers.
	setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
	setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);

	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
	if (!Subtarget.hasSSE2()) {
	setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
	setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
	if (Subtarget.is64Bit()) {
	setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
	// Without SSE, i64->f64 goes through memory.
	setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
	}
	} else if (!Subtarget.is64Bit())
	setOperationAction(ISD::BITCAST , MVT::i64 , Custom);

	// Scalar integer divide and remainder are lowered to use operations that
	// produce two results, to match the available instructions. This exposes
	// the two-result form to trivial CSE, which is able to combine x/y and x%y
	// into a single instruction.
	//
	// Scalar integer multiply-high is also lowered to use two-result
	// operations, to match the available instructions. However, plain multiply
	// (low) operations are left as Legal, as there are single-result
	// instructions for this in x86. Using the two-result multiply instructions
	// when both high and low results are needed must be arranged by dagcombine.
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	}

	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
	for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
	MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	setOperationAction(ISD::BR_CC, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	}
	if (Subtarget.is64Bit())
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);

	setOperationAction(ISD::FREM , MVT::f32 , Expand);
	setOperationAction(ISD::FREM , MVT::f64 , Expand);
	setOperationAction(ISD::FREM , MVT::f80 , Expand);
	setOperationAction(ISD::FREM , MVT::f128 , Expand);

	if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
	setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
	setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
	}

	// Promote the i8 variants and force them on up to i32 which has a shorter
	// encoding.
	setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
	setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
	// Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
	// a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
	// promote that too.
	setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
	setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);

	if (!Subtarget.hasBMI()) {
	setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
	if (Subtarget.is64Bit()) {
	setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
	}
	}

	if (Subtarget.hasLZCNT()) {
	// When promoting the i8 variants, force them to i32 for a shorter
	// encoding.
	setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
	setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
	} else {
	for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::CTLZ , VT, Custom);
	setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
	}
	}

	for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
	ISD::STRICT_FP_TO_FP16}) {
	// Special handling for half-precision floating point conversions.
	// If we don't have F16C support, then lower half float conversions
	// into library calls.
	setOperationAction(
	Op, MVT::f32,
	(!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
	// There's never any support for operations beyond MVT::f32.
	setOperationAction(Op, MVT::f64, Expand);
	setOperationAction(Op, MVT::f80, Expand);
	setOperationAction(Op, MVT::f128, Expand);
	}

	for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
	setTruncStoreAction(VT, MVT::f16, Expand);
	setTruncStoreAction(VT, MVT::bf16, Expand);

	setOperationAction(ISD::BF16_TO_FP, VT, Expand);
	setOperationAction(ISD::FP_TO_BF16, VT, Custom);
	}

	setOperationAction(ISD::PARITY, MVT::i8, Custom);
	setOperationAction(ISD::PARITY, MVT::i16, Custom);
	setOperationAction(ISD::PARITY, MVT::i32, Custom);
	if (Subtarget.is64Bit())
	setOperationAction(ISD::PARITY, MVT::i64, Custom);
	if (Subtarget.hasPOPCNT()) {
	setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
	// popcntw is longer to encode than popcntl and also has a false dependency
	// on the dest that popcntl hasn't had since Cannon Lake.
	setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
	} else {
	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
	if (Subtarget.is64Bit())
	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
	else
	setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
	}

	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);

	if (!Subtarget.hasMOVBE())
	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);

	// X86 wants to expand cmov itself.
	for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
	}
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	}

	// Custom action for SELECT MMX and expand action for SELECT_CC MMX
	setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);

	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
	// NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
	// LLVM/Clang supports zero-cost DWARF and SEH exception handling.
	setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
	setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
	setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
	if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
	setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");

	// Darwin ABI issue.
	for (auto VT : { MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::ConstantPool , VT, Custom);
	setOperationAction(ISD::JumpTable , VT, Custom);
	setOperationAction(ISD::GlobalAddress , VT, Custom);
	setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
	setOperationAction(ISD::ExternalSymbol , VT, Custom);
	setOperationAction(ISD::BlockAddress , VT, Custom);
	}

	// 64-bit shl, sra, srl (iff 32-bit x86)
	for (auto VT : { MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::SHL_PARTS, VT, Custom);
	setOperationAction(ISD::SRA_PARTS, VT, Custom);
	setOperationAction(ISD::SRL_PARTS, VT, Custom);
	}

	if (Subtarget.hasSSEPrefetch() \|\| Subtarget.hasThreeDNow())
	- setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
	+ setOperationAction(ISD::PREFETCH , MVT::Other, Custom);

	setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);

	// Expand certain atomics
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
	setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
	}

	if (!Subtarget.is64Bit())
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);

	if (Subtarget.canUseCMPXCHG16B())
	setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);

	// FIXME - use subtarget debug flags
	if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
	!Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
	TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
	setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
	}

	setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
	setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);

	setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
	setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

	setOperationAction(ISD::TRAP, MVT::Other, Legal);
	setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
	if (Subtarget.isTargetPS())
	setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
	else
	setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);

	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
	setOperationAction(ISD::VASTART , MVT::Other, Custom);
	setOperationAction(ISD::VAEND , MVT::Other, Expand);
	bool Is64Bit = Subtarget.is64Bit();
	setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
	setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);

	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

	setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);

	// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
	setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
	setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);

	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);

	auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
	setOperationAction(ISD::FABS, VT, Action);
	setOperationAction(ISD::FNEG, VT, Action);
	setOperationAction(ISD::FCOPYSIGN, VT, Expand);
	setOperationAction(ISD::FREM, VT, Action);
	setOperationAction(ISD::FMA, VT, Action);
	setOperationAction(ISD::FMINNUM, VT, Action);
	setOperationAction(ISD::FMAXNUM, VT, Action);
	setOperationAction(ISD::FMINIMUM, VT, Action);
	setOperationAction(ISD::FMAXIMUM, VT, Action);
	setOperationAction(ISD::FSIN, VT, Action);
	setOperationAction(ISD::FCOS, VT, Action);
	setOperationAction(ISD::FSINCOS, VT, Action);
	setOperationAction(ISD::FSQRT, VT, Action);
	setOperationAction(ISD::FPOW, VT, Action);
	setOperationAction(ISD::FLOG, VT, Action);
	setOperationAction(ISD::FLOG2, VT, Action);
	setOperationAction(ISD::FLOG10, VT, Action);
	setOperationAction(ISD::FEXP, VT, Action);
	setOperationAction(ISD::FEXP2, VT, Action);
	setOperationAction(ISD::FCEIL, VT, Action);
	setOperationAction(ISD::FFLOOR, VT, Action);
	setOperationAction(ISD::FNEARBYINT, VT, Action);
	setOperationAction(ISD::FRINT, VT, Action);
	setOperationAction(ISD::BR_CC, VT, Action);
	setOperationAction(ISD::SETCC, VT, Action);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SELECT_CC, VT, Action);
	setOperationAction(ISD::FROUND, VT, Action);
	setOperationAction(ISD::FROUNDEVEN, VT, Action);
	setOperationAction(ISD::FTRUNC, VT, Action);
	};

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
	// f16, f32 and f64 use SSE.
	// Set up the FP register classes.
	addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
	: &X86::FR16RegClass);
	addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
	: &X86::FR32RegClass);
	addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
	: &X86::FR64RegClass);

	// Disable f32->f64 extload as we can only generate this in one instruction
	// under optsize. So its easier to pattern match (fpext (load)) for that
	// case instead of needing to emit 2 instructions for extload in the
	// non-optsize case.
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

	for (auto VT : { MVT::f32, MVT::f64 }) {
	// Use ANDPD to simulate FABS.
	setOperationAction(ISD::FABS, VT, Custom);

	// Use XORP to simulate FNEG.
	setOperationAction(ISD::FNEG, VT, Custom);

	// Use ANDPD and ORPD to simulate FCOPYSIGN.
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);

	// These might be better off as horizontal vector ops.
	setOperationAction(ISD::FADD, VT, Custom);
	setOperationAction(ISD::FSUB, VT, Custom);

	// We don't support sin/cos/fmod
	setOperationAction(ISD::FSIN , VT, Expand);
	setOperationAction(ISD::FCOS , VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	}

	// Half type will be promoted by default.
	setF16Action(MVT::f16, Promote);
	setOperationAction(ISD::FADD, MVT::f16, Promote);
	setOperationAction(ISD::FSUB, MVT::f16, Promote);
	setOperationAction(ISD::FMUL, MVT::f16, Promote);
	setOperationAction(ISD::FDIV, MVT::f16, Promote);
	setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);

	setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);

	setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
	setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");

	// Lower this to MOVMSK plus an AND.
	setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
	setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);

	} else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
	(UseX87 \|\| Is64Bit)) {
	// Use SSE for f32, x87 for f64.
	// Set up the FP register classes.
	addRegisterClass(MVT::f32, &X86::FR32RegClass);
	if (UseX87)
	addRegisterClass(MVT::f64, &X86::RFP64RegClass);

	// Use ANDPS to simulate FABS.
	setOperationAction(ISD::FABS , MVT::f32, Custom);

	// Use XORP to simulate FNEG.
	setOperationAction(ISD::FNEG , MVT::f32, Custom);

	if (UseX87)
	setOperationAction(ISD::UNDEF, MVT::f64, Expand);

	// Use ANDPS and ORPS to simulate FCOPYSIGN.
	if (UseX87)
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

	// We don't support sin/cos/fmod
	setOperationAction(ISD::FSIN , MVT::f32, Expand);
	setOperationAction(ISD::FCOS , MVT::f32, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);

	if (UseX87) {
	// Always expand sin/cos functions even though x87 has an instruction.
	setOperationAction(ISD::FSIN, MVT::f64, Expand);
	setOperationAction(ISD::FCOS, MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	}
	} else if (UseX87) {
	// f32 and f64 in x87.
	// Set up the FP register classes.
	addRegisterClass(MVT::f64, &X86::RFP64RegClass);
	addRegisterClass(MVT::f32, &X86::RFP32RegClass);

	for (auto VT : { MVT::f32, MVT::f64 }) {
	setOperationAction(ISD::UNDEF, VT, Expand);
	setOperationAction(ISD::FCOPYSIGN, VT, Expand);

	// Always expand sin/cos functions even though x87 has an instruction.
	setOperationAction(ISD::FSIN , VT, Expand);
	setOperationAction(ISD::FCOS , VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	}
	}

	// Expand FP32 immediates into loads from the stack, save special cases.
	if (isTypeLegal(MVT::f32)) {
	if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
	addLegalFPImmediate(APFloat(+0.0f)); // FLD0
	addLegalFPImmediate(APFloat(+1.0f)); // FLD1
	addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
	addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
	} else // SSE immediates.
	addLegalFPImmediate(APFloat(+0.0f)); // xorps
	}
	// Expand FP64 immediates into loads from the stack, save special cases.
	if (isTypeLegal(MVT::f64)) {
	if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
	addLegalFPImmediate(APFloat(+0.0)); // FLD0
	addLegalFPImmediate(APFloat(+1.0)); // FLD1
	addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
	addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
	} else // SSE immediates.
	addLegalFPImmediate(APFloat(+0.0)); // xorpd
	}
	// Support fp16 0 immediate.
	if (isTypeLegal(MVT::f16))
	addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));

	// Handle constrained floating-point operations of scalar.
	setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);

	// We don't support FMA.
	setOperationAction(ISD::FMA, MVT::f64, Expand);
	setOperationAction(ISD::FMA, MVT::f32, Expand);

	// f80 always uses X87.
	if (UseX87) {
	addRegisterClass(MVT::f80, &X86::RFP80RegClass);
	setOperationAction(ISD::UNDEF, MVT::f80, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
	{
	APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
	addLegalFPImmediate(TmpFlt); // FLD0
	TmpFlt.changeSign();
	addLegalFPImmediate(TmpFlt); // FLD0/FCHS

	bool ignored;
	APFloat TmpFlt2(+1.0);
	TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
	&ignored);
	addLegalFPImmediate(TmpFlt2); // FLD1
	TmpFlt2.changeSign();
	addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
	}

	// Always expand sin/cos functions even though x87 has an instruction.
	setOperationAction(ISD::FSIN , MVT::f80, Expand);
	setOperationAction(ISD::FCOS , MVT::f80, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f80, Expand);

	setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
	setOperationAction(ISD::FCEIL, MVT::f80, Expand);
	setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
	setOperationAction(ISD::FRINT, MVT::f80, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
	setOperationAction(ISD::FMA, MVT::f80, Expand);
	setOperationAction(ISD::LROUND, MVT::f80, Expand);
	setOperationAction(ISD::LLROUND, MVT::f80, Expand);
	setOperationAction(ISD::LRINT, MVT::f80, Custom);
	setOperationAction(ISD::LLRINT, MVT::f80, Custom);

	// Handle constrained floating-point operations of scalar.
	setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
	setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
	setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
	setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
	setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
	if (isTypeLegal(MVT::f16)) {
	setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
	} else {
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
	}
	// FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
	// as Custom.
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
	}

	// f128 uses xmm registers, but most operations require libcalls.
	if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
	addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps

	setOperationAction(ISD::FADD, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
	setOperationAction(ISD::FSUB, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
	setOperationAction(ISD::FDIV, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
	setOperationAction(ISD::FMUL, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
	setOperationAction(ISD::FMA, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);

	setOperationAction(ISD::FABS, MVT::f128, Custom);
	setOperationAction(ISD::FNEG, MVT::f128, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);

	setOperationAction(ISD::FSIN, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
	setOperationAction(ISD::FCOS, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
	setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
	// No STRICT_FSINCOS
	setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
	setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);

	setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
	// We need to custom handle any FP_ROUND with an f128 input, but
	// LegalizeDAG uses the result type to know when to run a custom handler.
	// So we have to list all legal floating point result types here.
	if (isTypeLegal(MVT::f32)) {
	setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
	}
	if (isTypeLegal(MVT::f64)) {
	setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
	}
	if (isTypeLegal(MVT::f80)) {
	setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
	}

	setOperationAction(ISD::SETCC, MVT::f128, Custom);

	setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
	setTruncStoreAction(MVT::f128, MVT::f80, Expand);
	}

	// Always use a library call for pow.
	setOperationAction(ISD::FPOW , MVT::f32 , Expand);
	setOperationAction(ISD::FPOW , MVT::f64 , Expand);
	setOperationAction(ISD::FPOW , MVT::f80 , Expand);
	setOperationAction(ISD::FPOW , MVT::f128 , Expand);

	setOperationAction(ISD::FLOG, MVT::f80, Expand);
	setOperationAction(ISD::FLOG2, MVT::f80, Expand);
	setOperationAction(ISD::FLOG10, MVT::f80, Expand);
	setOperationAction(ISD::FEXP, MVT::f80, Expand);
	setOperationAction(ISD::FEXP2, MVT::f80, Expand);
	setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
	setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);

	// Some FP actions are always expanded for vector types.
	for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
	MVT::v4f32, MVT::v8f32, MVT::v16f32,
	MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);
	setOperationAction(ISD::FCOPYSIGN, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	}

	// First set operation action for all vector types to either promote
	// (for widening) or expand (for scalarization). Then we will selectively
	// turn on ones that can be effectively codegen'd.
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
	setOperationAction(ISD::FMA, VT, Expand);
	setOperationAction(ISD::FFLOOR, VT, Expand);
	setOperationAction(ISD::FCEIL, VT, Expand);
	setOperationAction(ISD::FTRUNC, VT, Expand);
	setOperationAction(ISD::FRINT, VT, Expand);
	setOperationAction(ISD::FNEARBYINT, VT, Expand);
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	setOperationAction(ISD::CTPOP, VT, Expand);
	setOperationAction(ISD::CTTZ, VT, Expand);
	setOperationAction(ISD::CTLZ, VT, Expand);
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);
	setOperationAction(ISD::BSWAP, VT, Expand);
	setOperationAction(ISD::SETCC, VT, Expand);
	setOperationAction(ISD::FP_TO_UINT, VT, Expand);
	setOperationAction(ISD::FP_TO_SINT, VT, Expand);
	setOperationAction(ISD::UINT_TO_FP, VT, Expand);
	setOperationAction(ISD::SINT_TO_FP, VT, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
	setOperationAction(ISD::TRUNCATE, VT, Expand);
	setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
	setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
	setOperationAction(ISD::ANY_EXTEND, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
	setTruncStoreAction(InnerVT, VT, Expand);

	setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);

	// N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
	// types, we have to deal with them whether we ask for Expansion or not.
	// Setting Expand causes its own optimisation problems though, so leave
	// them legal.
	if (VT.getVectorElementType() == MVT::i1)
	setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);

	// EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
	// split/scalarized right now.
	if (VT.getVectorElementType() == MVT::f16 \|\|
	VT.getVectorElementType() == MVT::bf16)
	setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
	}
	}

	// FIXME: In order to prevent SSE instructions being expanded to MMX ones
	// with -msoft-float, disable use of MMX as well.
	if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
	addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
	// No operations on x86mmx supported, everything uses intrinsics.
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
	addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
	setOperationAction(ISD::FABS, MVT::v4f32, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
	setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);

	setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
	setOperationAction(ISD::STORE, MVT::v2f32, Custom);

	setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
	addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
	// registers cannot be used even for integer operations.
	addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
	MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
	setOperationAction(ISD::SDIV, VT, Custom);
	setOperationAction(ISD::SREM, VT, Custom);
	setOperationAction(ISD::UDIV, VT, Custom);
	setOperationAction(ISD::UREM, VT, Custom);
	}

	setOperationAction(ISD::MUL, MVT::v2i8, Custom);
	setOperationAction(ISD::MUL, MVT::v4i8, Custom);
	setOperationAction(ISD::MUL, MVT::v8i8, Custom);

	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);
	setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
	setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
	setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
	setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
	setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
	setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
	setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
	setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);

	setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
	setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
	setOperationAction(ISD::UMULO, MVT::v2i32, Custom);

	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
	setOperationAction(ISD::FABS, MVT::v2f64, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
	setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
	setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
	setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
	}

	setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
	setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
	setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
	setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
	setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
	setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
	setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
	setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
	setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
	setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);

	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::ABS, VT, Custom);

	// The condition codes aren't legal in SSE/AVX and under AVX512 we use
	// setcc all the way to isel and prefer SETGT in some isel patterns.
	setCondCodeAction(ISD::SETLT, VT, Custom);
	setCondCodeAction(ISD::SETLE, VT, Custom);
	}

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	}

	for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);

	if (VT == MVT::v2i64 && !Subtarget.is64Bit())
	continue;

	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	}
	setF16Action(MVT::v8f16, Expand);
	setOperationAction(ISD::FADD, MVT::v8f16, Expand);
	setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
	setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
	setOperationAction(ISD::FDIV, MVT::v8f16, Expand);

	// Custom lower v2i64 and v2f64 selects.
	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
	setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
	setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
	setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
	setOperationAction(ISD::SELECT, MVT::v16i8, Custom);

	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);

	// Custom legalize these to avoid over promotion or custom promotion.
	for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
	}

	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);

	setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);

	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);

	// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
	setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);

	setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);

	// We want to legalize this to an f64 load rather than an i64 load on
	// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
	// store.
	setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
	setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
	setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
	setOperationAction(ISD::STORE, MVT::v2i32, Custom);
	setOperationAction(ISD::STORE, MVT::v4i16, Custom);
	setOperationAction(ISD::STORE, MVT::v8i8, Custom);

	// Add 32-bit vector stores to help vectorization opportunities.
	setOperationAction(ISD::STORE, MVT::v2i16, Custom);
	setOperationAction(ISD::STORE, MVT::v4i8, Custom);

	setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
	setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
	setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
	if (!Subtarget.hasAVX512())
	setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);

	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);

	setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);

	setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);

	// In the customized shift lowering, the legal v4i32/v2i64 cases
	// in AVX2 will be recognized.
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	if (VT == MVT::v2i64) continue;
	setOperationAction(ISD::ROTL, VT, Custom);
	setOperationAction(ISD::ROTR, VT, Custom);
	setOperationAction(ISD::FSHL, VT, Custom);
	setOperationAction(ISD::FSHR, VT, Custom);
	}

	setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
	setOperationAction(ISD::ABS, MVT::v16i8, Legal);
	setOperationAction(ISD::ABS, MVT::v8i16, Legal);
	setOperationAction(ISD::ABS, MVT::v4i32, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
	setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
	setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
	setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);

	// These might be better off as horizontal vector ops.
	setOperationAction(ISD::ADD, MVT::i16, Custom);
	setOperationAction(ISD::ADD, MVT::i32, Custom);
	setOperationAction(ISD::SUB, MVT::i16, Custom);
	setOperationAction(ISD::SUB, MVT::i32, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
	for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
	setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
	setOperationAction(ISD::FCEIL, RoundedTy, Legal);
	setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
	setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
	setOperationAction(ISD::FRINT, RoundedTy, Legal);
	setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
	setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
	setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
	setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
	setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);

	setOperationAction(ISD::FROUND, RoundedTy, Custom);
	}

	setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
	setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
	setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
	setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
	setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
	setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
	setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
	setOperationAction(ISD::UMIN, MVT::v4i32, Legal);

	setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
	setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
	setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);

	// FIXME: Do we need to handle scalar-to-vector here?
	setOperationAction(ISD::MUL, MVT::v4i32, Legal);
	setOperationAction(ISD::SMULO, MVT::v2i32, Custom);

	// We directly match byte blends in the backend as they match the VSELECT
	// condition form.
	setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);

	// SSE41 brings specific instructions for doing vector sign extend even in
	// cases where we don't have SRA.
	for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
	}

	// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
	for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
	setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
	}

	if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
	// We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
	// do the pre and post work in the vector domain.
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
	// We need to mark SINT_TO_FP as Custom even though we want to expand it
	// so that DAG combine doesn't try to turn it into uint_to_fp.
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
	}
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
	setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::ROTL, VT, Custom);
	setOperationAction(ISD::ROTR, VT, Custom);
	}

	// XOP can efficiently perform BITREVERSE with VPPERM.
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
	setOperationAction(ISD::BITREVERSE, VT, Custom);

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
	bool HasInt256 = Subtarget.hasInt256();

	addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);

	for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
	setOperationAction(ISD::FFLOOR, VT, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
	setOperationAction(ISD::FCEIL, VT, Legal);
	setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
	setOperationAction(ISD::FTRUNC, VT, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
	setOperationAction(ISD::FRINT, VT, Legal);
	setOperationAction(ISD::STRICT_FRINT, VT, Legal);
	setOperationAction(ISD::FNEARBYINT, VT, Legal);
	setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
	setOperationAction(ISD::FROUNDEVEN, VT, Legal);
	setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);

	setOperationAction(ISD::FROUND, VT, Custom);

	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);
	}

	// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
	// even though v8i16 is a legal type.
	setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
	setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
	setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);

	setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
	setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
	setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);

	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);

	if (!Subtarget.hasAVX512())
	setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);

	// In the customized shift lowering, the legal v8i32/v4i64 cases
	// in AVX2 will be recognized.
	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	if (VT == MVT::v4i64) continue;
	setOperationAction(ISD::ROTL, VT, Custom);
	setOperationAction(ISD::ROTR, VT, Custom);
	setOperationAction(ISD::FSHL, VT, Custom);
	setOperationAction(ISD::FSHR, VT, Custom);
	}

	// These types need custom splitting if their input is a 128-bit vector.
	setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);

	setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
	setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
	setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
	setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
	setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
	setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
	setOperationAction(ISD::SELECT, MVT::v8f32, Custom);

	for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
	setOperationAction(ISD::ANY_EXTEND, VT, Custom);
	}

	setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);

	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);

	// The condition codes aren't legal in SSE/AVX and under AVX512 we use
	// setcc all the way to isel and prefer SETGT in some isel patterns.
	setCondCodeAction(ISD::SETLT, VT, Custom);
	setCondCodeAction(ISD::SETLE, VT, Custom);
	}

	if (Subtarget.hasAnyFMA()) {
	for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
	MVT::v2f64, MVT::v4f64 }) {
	setOperationAction(ISD::FMA, VT, Legal);
	setOperationAction(ISD::STRICT_FMA, VT, Legal);
	}
	}

	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
	}

	setOperationAction(ISD::MUL, MVT::v4i64, Custom);
	setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MUL, MVT::v32i8, Custom);

	setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
	setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
	setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
	setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
	setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);

	setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
	setOperationAction(ISD::UMULO, MVT::v32i8, Custom);

	setOperationAction(ISD::ABS, MVT::v4i64, Custom);
	setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
	setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
	setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
	setOperationAction(ISD::UMIN, MVT::v4i64, Custom);

	setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
	setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
	setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
	setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);

	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
	setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
	}

	for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
	}

	if (HasInt256) {
	// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
	// when we have a 256bit-wide blend with immediate.
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);

	// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
	for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
	setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
	}
	}

	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
	setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
	setOperationAction(ISD::MSTORE, VT, Legal);
	}

	// Extract subvector is special because the value type
	// (result) is 128-bit but the source is 256-bit wide.
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
	}

	// Custom lower several nodes for 256-bit types.
	for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
	MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::STORE, VT, Custom);
	}
	setF16Action(MVT::v16f16, Expand);
	setOperationAction(ISD::FADD, MVT::v16f16, Expand);
	setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
	setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
	setOperationAction(ISD::FDIV, MVT::v16f16, Expand);

	if (HasInt256) {
	setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);

	// Custom legalize 2x32 to get a little better code.
	setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
	setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);

	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
	setOperationAction(ISD::MGATHER, VT, Custom);
	}
	}

	if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
	Subtarget.hasF16C()) {
	for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
	setOperationAction(ISD::FP_ROUND, VT, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
	}
	for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
	setOperationAction(ISD::FP_EXTEND, VT, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
	}
	for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
	setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
	setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
	}

	setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
	}

	// This block controls legalization of the mask vector sizes that are
	// available with AVX512. 512-bit vectors are in a separate block controlled
	// by useAVX512Regs.
	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
	addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
	addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
	addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
	addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
	addRegisterClass(MVT::v16i1, &X86::VK16RegClass);

	setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);

	setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
	setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
	setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
	setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
	setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);

	// There is no byte sized k-register load or store without AVX512DQ.
	if (!Subtarget.hasDQI()) {
	setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
	setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
	setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
	setOperationAction(ISD::LOAD, MVT::v8i1, Custom);

	setOperationAction(ISD::STORE, MVT::v1i1, Custom);
	setOperationAction(ISD::STORE, MVT::v2i1, Custom);
	setOperationAction(ISD::STORE, MVT::v4i1, Custom);
	setOperationAction(ISD::STORE, MVT::v8i1, Custom);
	}

	// Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
	setOperationAction(ISD::ANY_EXTEND, VT, Custom);
	}

	for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
	setOperationAction(ISD::VSELECT, VT, Expand);

	for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::TRUNCATE, VT, Custom);

	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	}

	for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	}

	// This block controls legalization for 512-bit operations with 32/64 bit
	// elements. 512-bits can be disabled based on prefer-vector-width and
	// required-vector-width function attributes.
	if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
	bool HasBWI = Subtarget.hasBWI();

	addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
	addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
	addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
	addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
	addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
	addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
	addRegisterClass(MVT::v64i8, &X86::VR512RegClass);

	for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
	setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
	setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
	setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
	setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
	setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
	if (HasBWI)
	setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
	}

	for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FMA, VT, Legal);
	setOperationAction(ISD::STRICT_FMA, VT, Legal);
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);
	}

	for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
	setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
	setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
	setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
	}
	setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);

	setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);

	setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
	setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
	setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
	setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
	setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
	if (HasBWI)
	setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);

	// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
	// to 512-bit rather than use the AVX2 instructions so that we can use
	// k-masks.
	if (!Subtarget.hasVLX()) {
	for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MSTORE, VT, Custom);
	}
	}

	setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
	setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
	setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);

	if (HasBWI) {
	// Extends from v64i1 masks to 512-bit vectors.
	setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
	}

	for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::FFLOOR, VT, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
	setOperationAction(ISD::FCEIL, VT, Legal);
	setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
	setOperationAction(ISD::FTRUNC, VT, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
	setOperationAction(ISD::FRINT, VT, Legal);
	setOperationAction(ISD::STRICT_FRINT, VT, Legal);
	setOperationAction(ISD::FNEARBYINT, VT, Legal);
	setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
	setOperationAction(ISD::FROUNDEVEN, VT, Legal);
	setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);

	setOperationAction(ISD::FROUND, VT, Custom);
	}

	for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
	}

	setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
	setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
	setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
	setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);

	setOperationAction(ISD::MUL, MVT::v8i64, Custom);
	setOperationAction(ISD::MUL, MVT::v16i32, Legal);
	setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
	setOperationAction(ISD::MUL, MVT::v64i8, Custom);

	setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
	setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
	setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
	setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
	setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
	setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
	setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
	setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);

	setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
	setOperationAction(ISD::UMULO, MVT::v64i8, Custom);

	setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);

	for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::ROTL, VT, Custom);
	setOperationAction(ISD::ROTR, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);

	// The condition codes aren't legal in SSE/AVX and under AVX512 we use
	// setcc all the way to isel and prefer SETGT in some isel patterns.
	setCondCodeAction(ISD::SETLT, VT, Custom);
	setCondCodeAction(ISD::SETLE, VT, Custom);
	}
	for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);
	setOperationAction(ISD::ABS, VT, Legal);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
	}

	for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
	setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);
	setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
	setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
	}

	setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
	setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
	setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
	setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
	setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
	setOperationAction(ISD::FSHR, MVT::v16i32, Custom);

	if (Subtarget.hasDQI()) {
	for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
	ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
	ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
	setOperationAction(Opc, MVT::v8i64, Custom);
	setOperationAction(ISD::MUL, MVT::v8i64, Legal);
	}

	if (Subtarget.hasCDI()) {
	// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
	for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
	setOperationAction(ISD::CTLZ, VT, Legal);
	}
	} // Subtarget.hasCDI()

	if (Subtarget.hasVPOPCNTDQ()) {
	for (auto VT : { MVT::v16i32, MVT::v8i64 })
	setOperationAction(ISD::CTPOP, VT, Legal);
	}

	// Extract subvector is special because the value type
	// (result) is 256-bit but the source is 512-bit wide.
	// 128-bit was made Legal under AVX1.
	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
	MVT::v16f16, MVT::v8f32, MVT::v4f64 })
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

	for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
	MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	}
	setF16Action(MVT::v32f16, Expand);
	setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
	for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
	setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
	setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
	}

	for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::MLOAD, VT, Legal);
	setOperationAction(ISD::MSTORE, VT, Legal);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	}
	if (HasBWI) {
	for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
	setOperationAction(ISD::MLOAD, VT, Legal);
	setOperationAction(ISD::MSTORE, VT, Legal);
	}
	} else {
	setOperationAction(ISD::STORE, MVT::v32i16, Custom);
	setOperationAction(ISD::STORE, MVT::v64i8, Custom);
	}

	if (Subtarget.hasVBMI2()) {
	for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v16i16, MVT::v8i32, MVT::v4i64,
	MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
	setOperationAction(ISD::FSHL, VT, Custom);
	setOperationAction(ISD::FSHR, VT, Custom);
	}

	setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
	setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
	setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
	setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
	}
	}// useAVX512Regs

	// This block controls legalization for operations that don't have
	// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
	// narrower widths.
	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
	// These operations are handled on non-VLX by artificially widening in
	// isel patterns.

	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);

	if (Subtarget.hasDQI()) {
	// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
	// v2f32 UINT_TO_FP is already custom under SSE2.
	assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
	isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
	"Unexpected operation action!");
	// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
	setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
	}

	for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);
	setOperationAction(ISD::ABS, VT, Legal);
	}

	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
	setOperationAction(ISD::ROTL, VT, Custom);
	setOperationAction(ISD::ROTR, VT, Custom);
	}

	// Custom legalize 2x32 to get a little better code.
	setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
	setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);

	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
	setOperationAction(ISD::MSCATTER, VT, Custom);

	if (Subtarget.hasDQI()) {
	for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
	ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
	ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
	setOperationAction(Opc, MVT::v2i64, Custom);
	setOperationAction(Opc, MVT::v4i64, Custom);
	}
	setOperationAction(ISD::MUL, MVT::v2i64, Legal);
	setOperationAction(ISD::MUL, MVT::v4i64, Legal);
	}

	if (Subtarget.hasCDI()) {
	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
	setOperationAction(ISD::CTLZ, VT, Legal);
	}
	} // Subtarget.hasCDI()

	if (Subtarget.hasVPOPCNTDQ()) {
	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
	setOperationAction(ISD::CTPOP, VT, Legal);
	}
	}

	// This block control legalization of v32i1/v64i1 which are available with
	// AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
	// useBWIRegs.
	if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
	addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
	addRegisterClass(MVT::v64i1, &X86::VK64RegClass);

	for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
	setOperationAction(ISD::VSELECT, VT, Expand);
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	}

	for (auto VT : { MVT::v16i1, MVT::v32i1 })
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

	// Extends from v32i1 masks to 256-bit vectors.
	setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);

	for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
	setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
	setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
	}

	// These operations are handled on non-VLX by artificially widening in
	// isel patterns.
	// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?

	if (Subtarget.hasBITALG()) {
	for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
	setOperationAction(ISD::CTPOP, VT, Legal);
	}
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
	auto setGroup = [&] (MVT VT) {
	setOperationAction(ISD::FADD, VT, Legal);
	setOperationAction(ISD::STRICT_FADD, VT, Legal);
	setOperationAction(ISD::FSUB, VT, Legal);
	setOperationAction(ISD::STRICT_FSUB, VT, Legal);
	setOperationAction(ISD::FMUL, VT, Legal);
	setOperationAction(ISD::STRICT_FMUL, VT, Legal);
	setOperationAction(ISD::FDIV, VT, Legal);
	setOperationAction(ISD::STRICT_FDIV, VT, Legal);
	setOperationAction(ISD::FSQRT, VT, Legal);
	setOperationAction(ISD::STRICT_FSQRT, VT, Legal);

	setOperationAction(ISD::FFLOOR, VT, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
	setOperationAction(ISD::FCEIL, VT, Legal);
	setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
	setOperationAction(ISD::FTRUNC, VT, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
	setOperationAction(ISD::FRINT, VT, Legal);
	setOperationAction(ISD::STRICT_FRINT, VT, Legal);
	setOperationAction(ISD::FNEARBYINT, VT, Legal);
	setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);

	setOperationAction(ISD::LOAD, VT, Legal);
	setOperationAction(ISD::STORE, VT, Legal);

	setOperationAction(ISD::FMA, VT, Legal);
	setOperationAction(ISD::STRICT_FMA, VT, Legal);
	setOperationAction(ISD::VSELECT, VT, Legal);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);

	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	};

	// AVX512_FP16 scalar operations
	setGroup(MVT::f16);
	setOperationAction(ISD::FREM, MVT::f16, Promote);
	setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
	setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
	setOperationAction(ISD::BR_CC, MVT::f16, Expand);
	setOperationAction(ISD::SETCC, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
	setOperationAction(ISD::FROUND, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
	setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
	setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
	setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);

	setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
	setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);

	if (Subtarget.useAVX512Regs()) {
	setGroup(MVT::v32f16);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
	setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);

	setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
	setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
	setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
	MVT::v32i16);
	setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
	setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
	MVT::v32i16);
	setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
	setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
	MVT::v32i16);
	setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
	setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
	MVT::v32i16);

	setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);

	setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);

	setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
	}

	if (Subtarget.hasVLX()) {
	setGroup(MVT::v8f16);
	setGroup(MVT::v16f16);

	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);

	setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);

	// INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);

	setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);

	setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);

	// Need to custom widen these to prevent scalarization.
	setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
	setOperationAction(ISD::STORE, MVT::v4f16, Custom);
	}
	}

	if (!Subtarget.useSoftFloat() &&
	(Subtarget.hasAVXNECONVERT() \|\| Subtarget.hasBF16())) {
	addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
	addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
	// We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
	// provide the method to promote BUILD_VECTOR. Set the operation action
	// Custom to do the customization later.
	setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
	for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
	setF16Action(VT, Expand);
	setOperationAction(ISD::FADD, VT, Expand);
	setOperationAction(ISD::FSUB, VT, Expand);
	setOperationAction(ISD::FMUL, VT, Expand);
	setOperationAction(ISD::FDIV, VT, Expand);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	}
	addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
	addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
	setF16Action(MVT::v32bf16, Expand);
	setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
	setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
	setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
	setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
	+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
	setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
	setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
	setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
	setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
	setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);

	setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
	setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
	setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
	setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
	setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);

	if (Subtarget.hasBWI()) {
	setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
	setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
	}

	if (Subtarget.hasFP16()) {
	// vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
	setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
	// vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
	setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
	// vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
	setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
	// vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
	setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
	}

	setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
	}

	if (Subtarget.hasAMXTILE()) {
	addRegisterClass(MVT::x86amx, &X86::TILERegClass);
	}

	// We want to custom lower some of our intrinsics.
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
	if (!Subtarget.is64Bit()) {
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
	}

	// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
	// handle type legalization for these operations here.
	//
	// FIXME: We really should do custom legalization for addition and
	// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
	// than generic legalization for 64-bit multiplication-with-overflow, though.
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	// Add/Sub/Mul with overflow operations are custom lowered.
	setOperationAction(ISD::SADDO, VT, Custom);
	setOperationAction(ISD::UADDO, VT, Custom);
	setOperationAction(ISD::SSUBO, VT, Custom);
	setOperationAction(ISD::USUBO, VT, Custom);
	setOperationAction(ISD::SMULO, VT, Custom);
	setOperationAction(ISD::UMULO, VT, Custom);

	// Support carry in as value rather than glue.
	setOperationAction(ISD::ADDCARRY, VT, Custom);
	setOperationAction(ISD::SUBCARRY, VT, Custom);
	setOperationAction(ISD::SETCCCARRY, VT, Custom);
	setOperationAction(ISD::SADDO_CARRY, VT, Custom);
	setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
	}

	if (!Subtarget.is64Bit()) {
	// These libcalls are not available in 32-bit.
	setLibcallName(RTLIB::SHL_I128, nullptr);
	setLibcallName(RTLIB::SRL_I128, nullptr);
	setLibcallName(RTLIB::SRA_I128, nullptr);
	setLibcallName(RTLIB::MUL_I128, nullptr);
	// The MULO libcall is not part of libgcc, only compiler-rt.
	setLibcallName(RTLIB::MULO_I64, nullptr);
	}
	// The MULO libcall is not part of libgcc, only compiler-rt.
	setLibcallName(RTLIB::MULO_I128, nullptr);

	// Combine sin / cos into _sincos_stret if it is available.
	if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
	getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
	setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
	setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
	}

	if (Subtarget.isTargetWin64()) {
	setOperationAction(ISD::SDIV, MVT::i128, Custom);
	setOperationAction(ISD::UDIV, MVT::i128, Custom);
	setOperationAction(ISD::SREM, MVT::i128, Custom);
	setOperationAction(ISD::UREM, MVT::i128, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
	}

	// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
	// is. We should promote the value to 64-bits to solve this.
	// This is what the CRT headers do - `fmodf` is an inline header
	// function casting to f64 and calling `fmod`.
	if (Subtarget.is32Bit() &&
	(Subtarget.isTargetWindowsMSVC() \|\| Subtarget.isTargetWindowsItanium()))
	for (ISD::NodeType Op :
	{ISD::FCEIL, ISD::STRICT_FCEIL,
	ISD::FCOS, ISD::STRICT_FCOS,
	ISD::FEXP, ISD::STRICT_FEXP,
	ISD::FFLOOR, ISD::STRICT_FFLOOR,
	ISD::FREM, ISD::STRICT_FREM,
	ISD::FLOG, ISD::STRICT_FLOG,
	ISD::FLOG10, ISD::STRICT_FLOG10,
	ISD::FPOW, ISD::STRICT_FPOW,
	ISD::FSIN, ISD::STRICT_FSIN})
	if (isOperationExpand(Op, MVT::f32))
	setOperationAction(Op, MVT::f32, Promote);

	// We have target-specific dag combine patterns for the following nodes:
	setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
	ISD::SCALAR_TO_VECTOR,
	ISD::INSERT_VECTOR_ELT,
	ISD::EXTRACT_VECTOR_ELT,
	ISD::CONCAT_VECTORS,
	ISD::INSERT_SUBVECTOR,
	ISD::EXTRACT_SUBVECTOR,
	ISD::BITCAST,
	ISD::VSELECT,
	ISD::SELECT,
	ISD::SHL,
	ISD::SRA,
	ISD::SRL,
	ISD::OR,
	ISD::AND,
	ISD::ADD,
	ISD::FADD,
	ISD::FSUB,
	ISD::FNEG,
	ISD::FMA,
	ISD::STRICT_FMA,
	ISD::FMINNUM,
	ISD::FMAXNUM,
	ISD::SUB,
	ISD::LOAD,
	ISD::MLOAD,
	ISD::STORE,
	ISD::MSTORE,
	ISD::TRUNCATE,
	ISD::ZERO_EXTEND,
	ISD::ANY_EXTEND,
	ISD::SIGN_EXTEND,
	ISD::SIGN_EXTEND_INREG,
	ISD::ANY_EXTEND_VECTOR_INREG,
	ISD::SIGN_EXTEND_VECTOR_INREG,
	ISD::ZERO_EXTEND_VECTOR_INREG,
	ISD::SINT_TO_FP,
	ISD::UINT_TO_FP,
	ISD::STRICT_SINT_TO_FP,
	ISD::STRICT_UINT_TO_FP,
	ISD::SETCC,
	ISD::MUL,
	ISD::XOR,
	ISD::MSCATTER,
	ISD::MGATHER,
	ISD::FP16_TO_FP,
	ISD::FP_EXTEND,
	ISD::STRICT_FP_EXTEND,
	ISD::FP_ROUND,
	ISD::STRICT_FP_ROUND});

	computeRegisterProperties(Subtarget.getRegisterInfo());

	MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
	MaxStoresPerMemsetOptSize = 8;
	MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
	MaxStoresPerMemcpyOptSize = 4;
	MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
	MaxStoresPerMemmoveOptSize = 4;

	// TODO: These control memcmp expansion in CGP and could be raised higher, but
	// that needs to benchmarked and balanced with the potential use of vector
	// load/store types (PR33329, PR33914).
	MaxLoadsPerMemcmp = 2;
	MaxLoadsPerMemcmpOptSize = 2;

	// Default loop alignment, which can be overridden by -align-loops.
	setPrefLoopAlignment(Align(16));

	// An out-of-order CPU can speculatively execute past a predictable branch,
	// but a conditional move could be stalled by an expensive earlier operation.
	PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
	EnableExtLdPromotion = true;
	setPrefFunctionAlignment(Align(16));

	verifyIntrinsicTables();

	// Default to having -disable-strictnode-mutation on
	IsStrictFPEnabled = true;
	}

	// This has so far only been implemented for 64-bit MachO.
	bool X86TargetLowering::useLoadStackGuardNode() const {
	return Subtarget.isTargetMachO() && Subtarget.is64Bit();
	}

	bool X86TargetLowering::useStackGuardXorFP() const {
	// Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
	return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
	}

	SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
	const SDLoc &DL) const {
	EVT PtrTy = getPointerTy(DAG.getDataLayout());
	unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
	MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
	return SDValue(Node, 0);
	}

	TargetLoweringBase::LegalizeTypeAction
	X86TargetLowering::getPreferredVectorAction(MVT VT) const {
	if ((VT == MVT::v32i1 \|\| VT == MVT::v64i1) && Subtarget.hasAVX512() &&
	!Subtarget.hasBWI())
	return TypeSplitVector;

	if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
	!Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
	return TypeSplitVector;

	if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
	VT.getVectorElementType() != MVT::i1)
	return TypeWidenVector;

	return TargetLoweringBase::getPreferredVectorAction(VT);
	}

	static std::pair<MVT, unsigned>
	handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
	const X86Subtarget &Subtarget) {
	// v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
	// convention is one that uses k registers.
	if (NumElts == 2)
	return {MVT::v2i64, 1};
	if (NumElts == 4)
	return {MVT::v4i32, 1};
	if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
	CC != CallingConv::Intel_OCL_BI)
	return {MVT::v8i16, 1};
	if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
	CC != CallingConv::Intel_OCL_BI)
	return {MVT::v16i8, 1};
	// v32i1 passes in ymm unless we have BWI and the calling convention is
	// regcall.
	if (NumElts == 32 && (!Subtarget.hasBWI() \|\| CC != CallingConv::X86_RegCall))
	return {MVT::v32i8, 1};
	// Split v64i1 vectors if we don't have v64i8 available.
	if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
	if (Subtarget.useAVX512Regs())
	return {MVT::v64i8, 1};
	return {MVT::v32i8, 2};
	}

	// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
	if (!isPowerOf2_32(NumElts) \|\| (NumElts == 64 && !Subtarget.hasBWI()) \|\|
	NumElts > 64)
	return {MVT::i8, NumElts};

	return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
	}

	MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
	CallingConv::ID CC,
	EVT VT) const {
	if (VT.isVector()) {
	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
	unsigned NumElts = VT.getVectorNumElements();

	MVT RegisterVT;
	unsigned NumRegisters;
	std::tie(RegisterVT, NumRegisters) =
	handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
	if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
	return RegisterVT;
	}

	if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
	return MVT::v8f16;
	}

	// We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
	if ((VT == MVT::f64 \|\| VT == MVT::f80) && !Subtarget.is64Bit() &&
	!Subtarget.hasX87())
	return MVT::i32;

	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
	return getRegisterTypeForCallingConv(Context, CC,
	VT.changeVectorElementTypeToInteger());

	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
	}

	unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
	CallingConv::ID CC,
	EVT VT) const {
	if (VT.isVector()) {
	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
	unsigned NumElts = VT.getVectorNumElements();

	MVT RegisterVT;
	unsigned NumRegisters;
	std::tie(RegisterVT, NumRegisters) =
	handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
	if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
	return NumRegisters;
	}

	if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
	return 1;
	}

	// We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
	// x87 is disabled.
	if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
	if (VT == MVT::f64)
	return 2;
	if (VT == MVT::f80)
	return 3;
	}

	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
	return getNumRegistersForCallingConv(Context, CC,
	VT.changeVectorElementTypeToInteger());

	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
	}

	unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
	unsigned &NumIntermediates, MVT &RegisterVT) const {
	// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
	Subtarget.hasAVX512() &&
	(!isPowerOf2_32(VT.getVectorNumElements()) \|\|
	(VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) \|\|
	VT.getVectorNumElements() > 64)) {
	RegisterVT = MVT::i8;
	IntermediateVT = MVT::i1;
	NumIntermediates = VT.getVectorNumElements();
	return NumIntermediates;
	}

	// Split v64i1 vectors if we don't have v64i8 available.
	if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
	CC != CallingConv::X86_RegCall) {
	RegisterVT = MVT::v32i8;
	IntermediateVT = MVT::v32i1;
	NumIntermediates = 2;
	return 2;
	}

	return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
	NumIntermediates, RegisterVT);
	}

	EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
	LLVMContext& Context,
	EVT VT) const {
	if (!VT.isVector())
	return MVT::i8;

	if (Subtarget.hasAVX512()) {
	// Figure out what this type will be legalized to.
	EVT LegalVT = VT;
	while (getTypeAction(Context, LegalVT) != TypeLegal)
	LegalVT = getTypeToTransformTo(Context, LegalVT);

	// If we got a 512-bit vector then we'll definitely have a vXi1 compare.
	if (LegalVT.getSimpleVT().is512BitVector())
	return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());

	if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
	// If we legalized to less than a 512-bit vector, then we will use a vXi1
	// compare for vXi32/vXi64 for sure. If we have BWI we will also support
	// vXi16/vXi8.
	MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
	if (Subtarget.hasBWI() \|\| EltVT.getSizeInBits() >= 32)
	return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
	}
	}

	return VT.changeVectorElementTypeToInteger();
	}

	/// Helper for getByValTypeAlignment to determine
	/// the desired ByVal argument alignment.
	static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
	if (MaxAlign == 16)
	return;
	if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
	if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
	MaxAlign = Align(16);
	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
	Align EltAlign;
	getMaxByValAlign(ATy->getElementType(), EltAlign);
	if (EltAlign > MaxAlign)
	MaxAlign = EltAlign;
	} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
	for (auto *EltTy : STy->elements()) {
	Align EltAlign;
	getMaxByValAlign(EltTy, EltAlign);
	if (EltAlign > MaxAlign)
	MaxAlign = EltAlign;
	if (MaxAlign == 16)
	break;
	}
	}
	}

	/// Return the desired alignment for ByVal aggregate
	/// function arguments in the caller parameter area. For X86, aggregates
	/// that contain SSE vectors are placed at 16-byte boundaries while the rest
	/// are at 4-byte boundaries.
	uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
	const DataLayout &DL) const {
	if (Subtarget.is64Bit()) {
	// Max of 8 and alignment of type.
	Align TyAlign = DL.getABITypeAlign(Ty);
	if (TyAlign > 8)
	return TyAlign.value();
	return 8;
	}

	Align Alignment(4);
	if (Subtarget.hasSSE1())
	getMaxByValAlign(Ty, Alignment);
	return Alignment.value();
	}

	/// It returns EVT::Other if the type should be determined using generic
	/// target-independent logic.
	/// For vector ops we check that the overall size isn't larger than our
	/// preferred vector width.
	EVT X86TargetLowering::getOptimalMemOpType(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
	if (Op.size() >= 16 &&
	(!Subtarget.isUnalignedMem16Slow() \|\| Op.isAligned(Align(16)))) {
	// FIXME: Check if unaligned 64-byte accesses are slow.
	if (Op.size() >= 64 && Subtarget.hasAVX512() &&
	(Subtarget.getPreferVectorWidth() >= 512)) {
	return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
	}
	// FIXME: Check if unaligned 32-byte accesses are slow.
	if (Op.size() >= 32 && Subtarget.hasAVX() &&
	Subtarget.useLight256BitInstructions()) {
	// Although this isn't a well-supported type for AVX1, we'll let
	// legalization and shuffle lowering produce the optimal codegen. If we
	// choose an optimal type with a vector element larger than a byte,
	// getMemsetStores() may create an intermediate splat (using an integer
	// multiply) before we splat as a vector.
	return MVT::v32i8;
	}
	if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
	return MVT::v16i8;
	// TODO: Can SSE1 handle a byte vector?
	// If we have SSE1 registers we should be able to use them.
	if (Subtarget.hasSSE1() && (Subtarget.is64Bit() \|\| Subtarget.hasX87()) &&
	(Subtarget.getPreferVectorWidth() >= 128))
	return MVT::v4f32;
	} else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) \|\| Op.isZeroMemset()) &&
	Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
	// Do not use f64 to lower memcpy if source is string constant. It's
	// better to use i32 to avoid the loads.
	// Also, do not use f64 to lower memset unless this is a memset of zeros.
	// The gymnastics of splatting a byte value into an XMM register and then
	// only using 8-byte stores (because this is a CPU with slow unaligned
	// 16-byte accesses) makes that a loser.
	return MVT::f64;
	}
	}
	// This is a compromise. If we reach here, unaligned accesses may be slow on
	// this target. However, creating smaller, aligned accesses could be even
	// slower and would certainly be a lot more code.
	if (Subtarget.is64Bit() && Op.size() >= 8)
	return MVT::i64;
	return MVT::i32;
	}

	bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
	if (VT == MVT::f32)
	return Subtarget.hasSSE1();
	if (VT == MVT::f64)
	return Subtarget.hasSSE2();
	return true;
	}

	static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
	return (8 * Alignment.value()) % SizeInBits == 0;
	}

	bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
	if (isBitAligned(Alignment, VT.getSizeInBits()))
	return true;
	switch (VT.getSizeInBits()) {
	default:
	// 8-byte and under are always assumed to be fast.
	return true;
	case 128:
	return !Subtarget.isUnalignedMem16Slow();
	case 256:
	return !Subtarget.isUnalignedMem32Slow();
	// TODO: What about AVX-512 (512-bit) accesses?
	}
	}

	bool X86TargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
	unsigned *Fast) const {
	if (Fast)
	*Fast = isMemoryAccessFast(VT, Alignment);
	// NonTemporal vector memory ops must be aligned.
	if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
	// NT loads can only be vector aligned, so if its less aligned than the
	// minimum vector size (which we can split the vector down to), we might as
	// well use a regular unaligned vector load.
	// We don't have any NT loads pre-SSE41.
	if (!!(Flags & MachineMemOperand::MOLoad))
	return (Alignment < 16 \|\| !Subtarget.hasSSE41());
	return false;
	}
	// Misaligned accesses of any size are always allowed.
	return true;
	}

	bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
	const DataLayout &DL, EVT VT,
	unsigned AddrSpace, Align Alignment,
	MachineMemOperand::Flags Flags,
	unsigned *Fast) const {
	if (Fast)
	*Fast = isMemoryAccessFast(VT, Alignment);
	if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
	if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
	/Fast=/nullptr))
	return true;
	// NonTemporal vector memory ops are special, and must be aligned.
	if (!isBitAligned(Alignment, VT.getSizeInBits()))
	return false;
	switch (VT.getSizeInBits()) {
	case 128:
	if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
	return true;
	if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
	return true;
	return false;
	case 256:
	if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
	return true;
	if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
	return true;
	return false;
	case 512:
	if (Subtarget.hasAVX512())
	return true;
	return false;
	default:
	return false; // Don't have NonTemporal vector memory ops of this size.
	}
	}
	return true;
	}

	/// Return the entry encoding for a jump table in the
	/// current function. The returned value is a member of the
	/// MachineJumpTableInfo::JTEntryKind enum.
	unsigned X86TargetLowering::getJumpTableEncoding() const {
	// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
	// symbol.
	if (isPositionIndependent() && Subtarget.isPICStyleGOT())
	return MachineJumpTableInfo::EK_Custom32;

	// Otherwise, use the normal jump table encoding heuristics.
	return TargetLowering::getJumpTableEncoding();
	}

	bool X86TargetLowering::splitValueIntoRegisterParts(
	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
	bool IsABIRegCopy = CC.has_value();
	EVT ValueVT = Val.getValueType();
	if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
	unsigned ValueBits = ValueVT.getSizeInBits();
	unsigned PartBits = PartVT.getSizeInBits();
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	Parts[0] = Val;
	return true;
	}
	return false;
	}

	SDValue X86TargetLowering::joinRegisterPartsIntoValue(
	SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
	bool IsABIRegCopy = CC.has_value();
	if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
	unsigned ValueBits = ValueVT.getSizeInBits();
	unsigned PartBits = PartVT.getSizeInBits();
	SDValue Val = Parts[0];

	Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
	Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
	Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
	return Val;
	}
	return SDValue();
	}

	bool X86TargetLowering::useSoftFloat() const {
	return Subtarget.useSoftFloat();
	}

	void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
	ArgListTy &Args) const {

	// Only relabel X86-32 for C / Stdcall CCs.
	if (Subtarget.is64Bit())
	return;
	if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
	return;
	unsigned ParamRegs = 0;
	if (auto *M = MF->getFunction().getParent())
	ParamRegs = M->getNumberRegisterParameters();

	// Mark the first N int arguments as having reg
	for (auto &Arg : Args) {
	Type *T = Arg.Ty;
	if (T->isIntOrPtrTy())
	if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
	unsigned numRegs = 1;
	if (MF->getDataLayout().getTypeAllocSize(T) > 4)
	numRegs = 2;
	if (ParamRegs < numRegs)
	return;
	ParamRegs -= numRegs;
	Arg.IsInReg = true;
	}
	}
	}

	const MCExpr *
	X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
	const MachineBasicBlock *MBB,
	unsigned uid,MCContext &Ctx) const{
	assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
	// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
	// entries.
	return MCSymbolRefExpr::create(MBB->getSymbol(),
	MCSymbolRefExpr::VK_GOTOFF, Ctx);
	}

	/// Returns relocation base for the given PIC jumptable.
	SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
	SelectionDAG &DAG) const {
	if (!Subtarget.is64Bit())
	// This doesn't have SDLoc associated with it, but is not really the
	// same as a Register.
	return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
	getPointerTy(DAG.getDataLayout()));
	return Table;
	}

	/// This returns the relocation base for the given PIC jumptable,
	/// the same as getPICJumpTableRelocBase, but as an MCExpr.
	const MCExpr *X86TargetLowering::
	getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
	MCContext &Ctx) const {
	// X86-64 uses RIP relative addressing based on the jump table label.
	if (Subtarget.isPICStyleRIPRel())
	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

	// Otherwise, the reference is relative to the PIC base.
	return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
	}

	std::pair<const TargetRegisterClass *, uint8_t>
	X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
	MVT VT) const {
	const TargetRegisterClass *RRC = nullptr;
	uint8_t Cost = 1;
	switch (VT.SimpleTy) {
	default:
	return TargetLowering::findRepresentativeClass(TRI, VT);
	case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
	RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
	break;
	case MVT::x86mmx:
	RRC = &X86::VR64RegClass;
	break;
	case MVT::f32: case MVT::f64:
	case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
	case MVT::v4f32: case MVT::v2f64:
	case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
	case MVT::v8f32: case MVT::v4f64:
	case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
	case MVT::v16f32: case MVT::v8f64:
	RRC = &X86::VR128XRegClass;
	break;
	}
	return std::make_pair(RRC, Cost);
	}

	unsigned X86TargetLowering::getAddressSpace() const {
	if (Subtarget.is64Bit())
	return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
	return 256;
	}

	static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
	return TargetTriple.isOSGlibc() \|\| TargetTriple.isOSFuchsia() \|\|
	(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
	}

	static Constant* SegmentOffset(IRBuilderBase &IRB,
	int Offset, unsigned AddressSpace) {
	return ConstantExpr::getIntToPtr(
	ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
	Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
	}

	Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
	// glibc, bionic, and Fuchsia have a special slot for the stack guard in
	// tcbhead_t; use it instead of the usual global variable (see
	// sysdeps/{i386,x86_64}/nptl/tls.h)
	if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
	if (Subtarget.isTargetFuchsia()) {
	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
	return SegmentOffset(IRB, 0x10, getAddressSpace());
	} else {
	unsigned AddressSpace = getAddressSpace();
	Module *M = IRB.GetInsertBlock()->getParent()->getParent();
	// Specially, some users may customize the base reg and offset.
	int Offset = M->getStackProtectorGuardOffset();
	// If we don't set -stack-protector-guard-offset value:
	// %fs:0x28, unless we're using a Kernel code model, in which case
	// it's %gs:0x28. gs:0x14 on i386.
	if (Offset == INT_MAX)
	Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;

	StringRef GuardReg = M->getStackProtectorGuardReg();
	if (GuardReg == "fs")
	AddressSpace = X86AS::FS;
	else if (GuardReg == "gs")
	AddressSpace = X86AS::GS;

	// Use symbol guard if user specify.
	StringRef GuardSymb = M->getStackProtectorGuardSymbol();
	if (!GuardSymb.empty()) {
	GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
	if (!GV) {
	Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
	: Type::getInt32Ty(M->getContext());
	GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
	nullptr, GuardSymb, nullptr,
	GlobalValue::NotThreadLocal, AddressSpace);
	}
	return GV;
	}

	return SegmentOffset(IRB, Offset, AddressSpace);
	}
	}
	return TargetLowering::getIRStackGuard(IRB);
	}

	void X86TargetLowering::insertSSPDeclarations(Module &M) const {
	// MSVC CRT provides functionalities for stack protection.
	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
	// MSVC CRT has a global variable holding security cookie.
	M.getOrInsertGlobal("__security_cookie",
	Type::getInt8PtrTy(M.getContext()));

	// MSVC CRT has a function to validate security cookie.
	FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
	"__security_check_cookie", Type::getVoidTy(M.getContext()),
	Type::getInt8PtrTy(M.getContext()));
	if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
	F->setCallingConv(CallingConv::X86_FastCall);
	F->addParamAttr(0, Attribute::AttrKind::InReg);
	}
	return;
	}

	StringRef GuardMode = M.getStackProtectorGuard();

	// glibc, bionic, and Fuchsia have a special slot for the stack guard.
	if ((GuardMode == "tls" \|\| GuardMode.empty()) &&
	hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
	return;
	TargetLowering::insertSSPDeclarations(M);
	}

	Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
	// MSVC CRT has a global variable holding security cookie.
	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
	return M.getGlobalVariable("__security_cookie");
	}
	return TargetLowering::getSDagStackGuard(M);
	}

	Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
	// MSVC CRT has a function to validate security cookie.
	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
	return M.getFunction("__security_check_cookie");
	}
	return TargetLowering::getSSPStackGuardCheck(M);
	}

	Value *
	X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
	if (Subtarget.getTargetTriple().isOSContiki())
	return getDefaultSafeStackPointerLocation(IRB, false);

	// Android provides a fixed TLS slot for the SafeStack pointer. See the
	// definition of TLS_SLOT_SAFESTACK in
	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
	if (Subtarget.isTargetAndroid()) {
	// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
	// %gs:0x24 on i386
	int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
	return SegmentOffset(IRB, Offset, getAddressSpace());
	}

	// Fuchsia is similar.
	if (Subtarget.isTargetFuchsia()) {
	// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
	return SegmentOffset(IRB, 0x18, getAddressSpace());
	}

	return TargetLowering::getSafeStackPointerLocation(IRB);
	}

	//===----------------------------------------------------------------------===//
	// Return Value Calling Convention Implementation
	//===----------------------------------------------------------------------===//

	bool X86TargetLowering::CanLowerReturn(
	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(Outs, RetCC_X86);
	}

	const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
	static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
	return ScratchRegs;
	}

	/// Lowers masks values (v*i1) to the local register values
	/// \returns DAG node after lowering to register type
	static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
	const SDLoc &Dl, SelectionDAG &DAG) {
	EVT ValVT = ValArg.getValueType();

	if (ValVT == MVT::v1i1)
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
	DAG.getIntPtrConstant(0, Dl));

	if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 \|\| ValLoc == MVT::i32)) \|\|
	(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 \|\| ValLoc == MVT::i32))) {
	// Two stage lowering might be required
	// bitcast: v8i1 -> i8 / v16i1 -> i16
	// anyextend: i8 -> i32 / i16 -> i32
	EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
	SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
	if (ValLoc == MVT::i32)
	ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
	return ValToCopy;
	}

	if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) \|\|
	(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
	// One stage lowering is required
	// bitcast: v32i1 -> i32 / v64i1 -> i64
	return DAG.getBitcast(ValLoc, ValArg);
	}

	return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
	}

	/// Breaks v64i1 value into two registers and adds the new node to the DAG
	static void Passv64i1ArgInRegs(
	const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
	SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
	CCValAssign &NextVA, const X86Subtarget &Subtarget) {
	assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
	assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
	"The value should reside in two registers");

	// Before splitting the value we cast it to i64
	Arg = DAG.getBitcast(MVT::i64, Arg);

	// Splitting the value into two i32 types
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
	DAG.getConstant(0, Dl, MVT::i32));
	Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
	DAG.getConstant(1, Dl, MVT::i32));

	// Attach the two i32 types into corresponding registers
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
	RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
	}

	SDValue
	X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &dl, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();

	// In some cases we need to disable registers from the default CSR list.
	// For example, when they are used for argument passing.
	bool ShouldDisableCalleeSavedRegister =
	CallConv == CallingConv::X86_RegCall \|\|
	MF.getFunction().hasFnAttribute("no_caller_saved_registers");

	if (CallConv == CallingConv::X86_INTR && !Outs.empty())
	report_fatal_error("X86 interrupts may not return any value");

	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
	CCInfo.AnalyzeReturn(Outs, RetCC_X86);

	SmallVector<std::pair<Register, SDValue>, 4> RetVals;
	for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
	++I, ++OutsIndex) {
	CCValAssign &VA = RVLocs[I];
	assert(VA.isRegLoc() && "Can only return in registers!");

	// Add the register to the CalleeSaveDisableRegs list.
	if (ShouldDisableCalleeSavedRegister)
	MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());

	SDValue ValToCopy = OutVals[OutsIndex];
	EVT ValVT = ValToCopy.getValueType();

	// Promote values to the appropriate types.
	if (VA.getLocInfo() == CCValAssign::SExt)
	ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
	else if (VA.getLocInfo() == CCValAssign::ZExt)
	ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
	else if (VA.getLocInfo() == CCValAssign::AExt) {
	if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
	ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
	else
	ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
	}
	else if (VA.getLocInfo() == CCValAssign::BCvt)
	ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);

	assert(VA.getLocInfo() != CCValAssign::FPExt &&
	"Unexpected FP-extend for return value.");

	// Report an error if we have attempted to return a value via an XMM
	// register and SSE was disabled.
	if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
	errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	} else if (!Subtarget.hasSSE2() &&
	X86::FR64XRegClass.contains(VA.getLocReg()) &&
	ValVT == MVT::f64) {
	// When returning a double via an XMM register, report an error if SSE2 is
	// not enabled.
	errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	}

	// Returns in ST0/ST1 are handled specially: these are pushed as operands to
	// the RET instruction and handled by the FP Stackifier.
	if (VA.getLocReg() == X86::FP0 \|\|
	VA.getLocReg() == X86::FP1) {
	// If this is a copy from an xmm register to ST(0), use an FPExtend to
	// change the value to the FP stack register class.
	if (isScalarFPTypeInSSEReg(VA.getValVT()))
	ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
	RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
	// Don't emit a copytoreg.
	continue;
	}

	// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
	// which is returned in RAX / RDX.
	if (Subtarget.is64Bit()) {
	if (ValVT == MVT::x86mmx) {
	if (VA.getLocReg() == X86::XMM0 \|\| VA.getLocReg() == X86::XMM1) {
	ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
	ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
	ValToCopy);
	// If we don't have SSE2 available, convert to v4f32 so the generated
	// register is legal.
	if (!Subtarget.hasSSE2())
	ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
	}
	}
	}

	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");

	Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
	Subtarget);

	// Add the second register to the CalleeSaveDisableRegs list.
	if (ShouldDisableCalleeSavedRegister)
	MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
	} else {
	RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
	}
	}

	SDValue Flag;
	SmallVector<SDValue, 6> RetOps;
	RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
	// Operand #1 = Bytes To Pop
	RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
	MVT::i32));

	// Copy the result values into the output registers.
	for (auto &RetVal : RetVals) {
	if (RetVal.first == X86::FP0 \|\| RetVal.first == X86::FP1) {
	RetOps.push_back(RetVal.second);
	continue; // Don't emit a copytoreg.
	}

	Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(
	DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
	}

	// Swift calling convention does not require we copy the sret argument
	// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.

	// All x86 ABIs require that for returning structs by value we copy
	// the sret argument into %rax/%eax (depending on ABI) for the return.
	// We saved the argument into a virtual register in the entry block,
	// so now we copy the value out and into %rax/%eax.
	//
	// Checking Function.hasStructRetAttr() here is insufficient because the IR
	// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
	// false, then an sret argument may be implicitly inserted in the SelDAG. In
	// either case FuncInfo->setSRetReturnReg() will have been called.
	if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
	// When we have both sret and another return value, we should use the
	// original Chain stored in RetOps[0], instead of the current Chain updated
	// in the above loop. If we only have sret, RetOps[0] equals to Chain.

	// For the case of sret and another return value, we have
	// Chain_0 at the function entry
	// Chain_1 = getCopyToReg(Chain_0) in the above loop
	// If we use Chain_1 in getCopyFromReg, we will have
	// Val = getCopyFromReg(Chain_1)
	// Chain_2 = getCopyToReg(Chain_1, Val) from below

	// getCopyToReg(Chain_0) will be glued together with
	// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
	// in Unit B, and we will have cyclic dependency between Unit A and Unit B:
	// Data dependency from Unit B to Unit A due to usage of Val in
	// getCopyToReg(Chain_1, Val)
	// Chain dependency from Unit A to Unit B

	// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
	SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
	getPointerTy(MF.getDataLayout()));

	Register RetValReg
	= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
	X86::RAX : X86::EAX;
	Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
	Flag = Chain.getValue(1);

	// RAX/EAX now acts like a return value.
	RetOps.push_back(
	DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));

	// Add the returned register to the CalleeSaveDisableRegs list.
	if (ShouldDisableCalleeSavedRegister)
	MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
	}

	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	const MCPhysReg *I =
	TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
	if (I) {
	for (; *I; ++I) {
	if (X86::GR64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i64));
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
	}
	}

	RetOps[0] = Chain; // Update chain.

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	X86ISD::NodeType opcode = X86ISD::RET_FLAG;
	if (CallConv == CallingConv::X86_INTR)
	opcode = X86ISD::IRET;
	return DAG.getNode(opcode, dl, MVT::Other, RetOps);
	}

	bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
	if (N->getNumValues() != 1 \|\| !N->hasNUsesOfValue(1, 0))
	return false;

	SDValue TCChain = Chain;
	SDNode Copy = N->use_begin();
	if (Copy->getOpcode() == ISD::CopyToReg) {
	// If the copy has a glue operand, we conservatively assume it isn't safe to
	// perform a tail call.
	if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
	return false;
	TCChain = Copy->getOperand(0);
	} else if (Copy->getOpcode() != ISD::FP_EXTEND)
	return false;

	bool HasRet = false;
	for (const SDNode *U : Copy->uses()) {
	if (U->getOpcode() != X86ISD::RET_FLAG)
	return false;
	// If we are returning more than one value, we can definitely
	// not make a tail call see PR19530
	if (U->getNumOperands() > 4)
	return false;
	if (U->getNumOperands() == 4 &&
	U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
	return false;
	HasRet = true;
	}

	if (!HasRet)
	return false;

	Chain = TCChain;
	return true;
	}

	EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
	ISD::NodeType ExtendKind) const {
	MVT ReturnMVT = MVT::i32;

	bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
	if (VT == MVT::i1 \|\| (!Darwin && (VT == MVT::i8 \|\| VT == MVT::i16))) {
	// The ABI does not require i1, i8 or i16 to be extended.
	//
	// On Darwin, there is code in the wild relying on Clang's old behaviour of
	// always extending i8/i16 return values, so keep doing that for now.
	// (PR26665).
	ReturnMVT = MVT::i8;
	}

	EVT MinVT = getRegisterType(Context, ReturnMVT);
	return VT.bitsLT(MinVT) ? MinVT : VT;
	}

	/// Reads two 32 bit registers and creates a 64 bit mask value.
	/// \param VA The current 32 bit value that need to be assigned.
	/// \param NextVA The next 32 bit value that need to be assigned.
	/// \param Root The parent DAG node.
	/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
	/// glue purposes. In the case the DAG is already using
	/// physical register instead of virtual, we should glue
	/// our new SDValue to InFlag SDvalue.
	/// \return a new SDvalue of size 64bit.
	static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
	SDValue &Root, SelectionDAG &DAG,
	const SDLoc &Dl, const X86Subtarget &Subtarget,
	SDValue *InFlag = nullptr) {
	assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
	assert(VA.getValVT() == MVT::v64i1 &&
	"Expecting first location of 64 bit width type");
	assert(NextVA.getValVT() == VA.getValVT() &&
	"The locations should have the same type");
	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
	"The values should reside in two registers");

	SDValue Lo, Hi;
	SDValue ArgValueLo, ArgValueHi;

	MachineFunction &MF = DAG.getMachineFunction();
	const TargetRegisterClass *RC = &X86::GR32RegClass;

	// Read a 32 bit value from the registers.
	if (nullptr == InFlag) {
	// When no physical register is present,
	// create an intermediate virtual register.
	Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
	Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
	ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
	} else {
	// When a physical register is available read the value from it and glue
	// the reads together.
	ArgValueLo =
	DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
	*InFlag = ArgValueLo.getValue(2);
	ArgValueHi =
	DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
	*InFlag = ArgValueHi.getValue(2);
	}

	// Convert the i32 type into v32i1 type.
	Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);

	// Convert the i32 type into v32i1 type.
	Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);

	// Concatenate the two values together.
	return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
	}

	/// The function will lower a register of various sizes (8/16/32/64)
	/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
	/// \returns a DAG node contains the operand after lowering to mask type.
	static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
	const EVT &ValLoc, const SDLoc &Dl,
	SelectionDAG &DAG) {
	SDValue ValReturned = ValArg;

	if (ValVT == MVT::v1i1)
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);

	if (ValVT == MVT::v64i1) {
	// In 32 bit machine, this case is handled by getv64i1Argument
	assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
	// In 64 bit machine, There is no need to truncate the value only bitcast
	} else {
	MVT maskLen;
	switch (ValVT.getSimpleVT().SimpleTy) {
	case MVT::v8i1:
	maskLen = MVT::i8;
	break;
	case MVT::v16i1:
	maskLen = MVT::i16;
	break;
	case MVT::v32i1:
	maskLen = MVT::i32;
	break;
	default:
	llvm_unreachable("Expecting a vector of i1 types");
	}

	ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
	}
	return DAG.getBitcast(ValVT, ValReturned);
	}

	/// Lower the result values of a call into the
	/// appropriate copies out of appropriate physical registers.
	///
	SDValue X86TargetLowering::LowerCallResult(
	SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
	uint32_t *RegMask) const {

	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallResult(Ins, RetCC_X86);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
	++I, ++InsIndex) {
	CCValAssign &VA = RVLocs[I];
	EVT CopyVT = VA.getLocVT();

	// In some calling conventions we need to remove the used registers
	// from the register mask.
	if (RegMask) {
	for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /IncludeSelf=/true);
	SubRegs.isValid(); ++SubRegs)
	RegMask[SubRegs / 32] &= ~(1u << (SubRegs % 32));
	}

	// Report an error if there was an attempt to return FP values via XMM
	// registers.
	if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
	errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
	if (VA.getLocReg() == X86::XMM1)
	VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
	else
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	} else if (!Subtarget.hasSSE2() &&
	X86::FR64XRegClass.contains(VA.getLocReg()) &&
	CopyVT == MVT::f64) {
	errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
	if (VA.getLocReg() == X86::XMM1)
	VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
	else
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	}

	// If we prefer to use the value in xmm registers, copy it out as f80 and
	// use a truncate to move it from fp stack reg to xmm reg.
	bool RoundAfterCopy = false;
	if ((VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1) &&
	isScalarFPTypeInSSEReg(VA.getValVT())) {
	if (!Subtarget.hasX87())
	report_fatal_error("X87 register return with X87 disabled");
	CopyVT = MVT::f80;
	RoundAfterCopy = (CopyVT != VA.getLocVT());
	}

	SDValue Val;
	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");
	Val =
	getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
	} else {
	Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
	.getValue(1);
	Val = Chain.getValue(0);
	InFlag = Chain.getValue(2);
	}

	if (RoundAfterCopy)
	Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
	// This truncation won't change the value.
	DAG.getIntPtrConstant(1, dl, /isTarget=/true));

	if (VA.isExtInLoc()) {
	if (VA.getValVT().isVector() &&
	VA.getValVT().getScalarType() == MVT::i1 &&
	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
	// promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
	Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
	} else
	Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
	}

	if (VA.getLocInfo() == CCValAssign::BCvt)
	Val = DAG.getBitcast(VA.getValVT(), Val);

	InVals.push_back(Val);
	}

	return Chain;
	}

	//===----------------------------------------------------------------------===//
	// C & StdCall & Fast Calling Convention implementation
	//===----------------------------------------------------------------------===//
	// StdCall calling convention seems to be standard for many Windows' API
	// routines and around. It differs from C calling convention just a little:
	// callee should clean up the stack, not caller. Symbols should be also
	// decorated in some fancy way :) It doesn't support any vector arguments.
	// For info on fast calling convention see Fast Calling Convention (tail call)
	// implementation LowerX86_32FastCCCallTo.

	/// Determines whether Args, either a set of outgoing arguments to a call, or a
	/// set of incoming args of a call, contains an sret pointer that the callee
	/// pops
	template <typename T>
	static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
	const X86Subtarget &Subtarget) {
	// Not C++20 (yet), so no concepts available.
	static_assert(std::is_same_v<T, ISD::OutputArg> \|\|
	std::is_same_v<T, ISD::InputArg>,
	"requires ISD::OutputArg or ISD::InputArg");

	// Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
	// for most compilations.
	if (!Subtarget.is32Bit())
	return false;

	if (Args.empty())
	return false;

	// Most calls do not have an sret argument, check the arg next.
	const ISD::ArgFlagsTy &Flags = Args[0].Flags;
	if (!Flags.isSRet() \|\| Flags.isInReg())
	return false;

	// The MSVCabi does not pop the sret.
	if (Subtarget.getTargetTriple().isOSMSVCRT())
	return false;

	// MCUs don't pop the sret
	if (Subtarget.isTargetMCU())
	return false;

	// Callee pops argument
	return true;
	}

	/// Make a copy of an aggregate at address specified by "Src" to address
	/// "Dst" with size and alignment information specified by the specific
	/// parameter attribute. The copy will be passed as a byval function parameter.
	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
	SDValue Chain, ISD::ArgFlagsTy Flags,
	SelectionDAG &DAG, const SDLoc &dl) {
	SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);

	return DAG.getMemcpy(
	Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
	/isVolatile/ false, /AlwaysInline=/true,
	/isTailCall/ false, MachinePointerInfo(), MachinePointerInfo());
	}

	/// Return true if the calling convention is one that we can guarantee TCO for.
	static bool canGuaranteeTCO(CallingConv::ID CC) {
	return (CC == CallingConv::Fast \|\| CC == CallingConv::GHC \|\|
	CC == CallingConv::X86_RegCall \|\| CC == CallingConv::HiPE \|\|
	CC == CallingConv::HHVM \|\| CC == CallingConv::Tail \|\|
	CC == CallingConv::SwiftTail);
	}

	/// Return true if we might ever do TCO for calls with this calling convention.
	static bool mayTailCallThisCC(CallingConv::ID CC) {
	switch (CC) {
	// C calling conventions:
	case CallingConv::C:
	case CallingConv::Win64:
	case CallingConv::X86_64_SysV:
	// Callee pop conventions:
	case CallingConv::X86_ThisCall:
	case CallingConv::X86_StdCall:
	case CallingConv::X86_VectorCall:
	case CallingConv::X86_FastCall:
	// Swift:
	case CallingConv::Swift:
	return true;
	default:
	return canGuaranteeTCO(CC);
	}
	}

	/// Return true if the function is being made into a tailcall target by
	/// changing its ABI.
	static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
	return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) \|\|
	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail;
	}

	bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	if (!CI->isTailCall())
	return false;

	CallingConv::ID CalleeCC = CI->getCallingConv();
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	return true;
	}

	SDValue
	X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	const SDLoc &dl, SelectionDAG &DAG,
	const CCValAssign &VA,
	MachineFrameInfo &MFI, unsigned i) const {
	// Create the nodes corresponding to a load from this parameter slot.
	ISD::ArgFlagsTy Flags = Ins[i].Flags;
	bool AlwaysUseMutable = shouldGuaranteeTCO(
	CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
	bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
	EVT ValVT;
	MVT PtrVT = getPointerTy(DAG.getDataLayout());

	// If value is passed by pointer we have address passed instead of the value
	// itself. No need to extend if the mask value and location share the same
	// absolute size.
	bool ExtendedInMem =
	VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
	VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();

	if (VA.getLocInfo() == CCValAssign::Indirect \|\| ExtendedInMem)
	ValVT = VA.getLocVT();
	else
	ValVT = VA.getValVT();

	// FIXME: For now, all byval parameter objects are marked mutable. This can be
	// changed with more analysis.
	// In case of tail call optimization mark all arguments mutable. Since they
	// could be overwritten by lowering of arguments in case of a tail call.
	if (Flags.isByVal()) {
	unsigned Bytes = Flags.getByValSize();
	if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.

	// FIXME: For now, all byval parameter objects are marked as aliasing. This
	// can be improved with deeper analysis.
	int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
	/isAliased=/true);
	return DAG.getFrameIndex(FI, PtrVT);
	}

	EVT ArgVT = Ins[i].ArgVT;

	// If this is a vector that has been split into multiple parts, and the
	// scalar size of the parts don't match the vector element size, then we can't
	// elide the copy. The parts will have padding between them instead of being
	// packed like a vector.
	bool ScalarizedAndExtendedVector =
	ArgVT.isVector() && !VA.getLocVT().isVector() &&
	VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();

	// This is an argument in memory. We might be able to perform copy elision.
	// If the argument is passed directly in memory without any extension, then we
	// can perform copy elision. Large vector types, for example, may be passed
	// indirectly by pointer.
	if (Flags.isCopyElisionCandidate() &&
	VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
	!ScalarizedAndExtendedVector) {
	SDValue PartAddr;
	if (Ins[i].PartOffset == 0) {
	// If this is a one-part value or the first part of a multi-part value,
	// create a stack object for the entire argument value type and return a
	// load from our portion of it. This assumes that if the first part of an
	// argument is in memory, the rest will also be in memory.
	int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
	/IsImmutable=/false);
	PartAddr = DAG.getFrameIndex(FI, PtrVT);
	return DAG.getLoad(
	ValVT, dl, Chain, PartAddr,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
	} else {
	// This is not the first piece of an argument in memory. See if there is
	// already a fixed stack object including this offset. If so, assume it
	// was created by the PartOffset == 0 branch above and create a load from
	// the appropriate offset into it.
	int64_t PartBegin = VA.getLocMemOffset();
	int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
	int FI = MFI.getObjectIndexBegin();
	for (; MFI.isFixedObjectIndex(FI); ++FI) {
	int64_t ObjBegin = MFI.getObjectOffset(FI);
	int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
	if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
	break;
	}
	if (MFI.isFixedObjectIndex(FI)) {
	SDValue Addr =
	DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
	DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
	return DAG.getLoad(
	ValVT, dl, Chain, Addr,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
	Ins[i].PartOffset));
	}
	}
	}

	int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
	VA.getLocMemOffset(), isImmutable);

	// Set SExt or ZExt flag.
	if (VA.getLocInfo() == CCValAssign::ZExt) {
	MFI.setObjectZExt(FI, true);
	} else if (VA.getLocInfo() == CCValAssign::SExt) {
	MFI.setObjectSExt(FI, true);
	}

	MaybeAlign Alignment;
	if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
	ValVT != MVT::f80)
	Alignment = MaybeAlign(4);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	SDValue Val = DAG.getLoad(
	ValVT, dl, Chain, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
	Alignment);
	return ExtendedInMem
	? (VA.getValVT().isVector()
	? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
	: DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
	: Val;
	}

	// FIXME: Get this from tablegen.
	static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
	const X86Subtarget &Subtarget) {
	assert(Subtarget.is64Bit());

	if (Subtarget.isCallingConvWin64(CallConv)) {
	static const MCPhysReg GPR64ArgRegsWin64[] = {
	X86::RCX, X86::RDX, X86::R8, X86::R9
	};
	return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
	}

	static const MCPhysReg GPR64ArgRegs64Bit[] = {
	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
	};
	return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
	}

	// FIXME: Get this from tablegen.
	static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
	CallingConv::ID CallConv,
	const X86Subtarget &Subtarget) {
	assert(Subtarget.is64Bit());
	if (Subtarget.isCallingConvWin64(CallConv)) {
	// The XMM registers which might contain var arg parameters are shadowed
	// in their paired GPR. So we only need to save the GPR to their home
	// slots.
	// TODO: __vectorcall will change this.
	return std::nullopt;
	}

	bool isSoftFloat = Subtarget.useSoftFloat();
	if (isSoftFloat \|\| !Subtarget.hasSSE1())
	// Kernel mode asks for SSE to be disabled, so there are no XMM argument
	// registers.
	return std::nullopt;

	static const MCPhysReg XMMArgRegs64Bit[] = {
	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
	};
	return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
	}

	#ifndef NDEBUG
	static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
	return llvm::is_sorted(
	ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
	return A.getValNo() < B.getValNo();
	});
	}
	#endif

	namespace {
	/// This is a helper class for lowering variable arguments parameters.
	class VarArgsLoweringHelper {
	public:
	VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
	SelectionDAG &DAG, const X86Subtarget &Subtarget,
	CallingConv::ID CallConv, CCState &CCInfo)
	: FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
	TheMachineFunction(DAG.getMachineFunction()),
	TheFunction(TheMachineFunction.getFunction()),
	FrameInfo(TheMachineFunction.getFrameInfo()),
	FrameLowering(*Subtarget.getFrameLowering()),
	TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
	CCInfo(CCInfo) {}

	// Lower variable arguments parameters.
	void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);

	private:
	void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);

	void forwardMustTailParameters(SDValue &Chain);

	bool is64Bit() const { return Subtarget.is64Bit(); }
	bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }

	X86MachineFunctionInfo *FuncInfo;
	const SDLoc &DL;
	SelectionDAG &DAG;
	const X86Subtarget &Subtarget;
	MachineFunction &TheMachineFunction;
	const Function &TheFunction;
	MachineFrameInfo &FrameInfo;
	const TargetFrameLowering &FrameLowering;
	const TargetLowering &TargLowering;
	CallingConv::ID CallConv;
	CCState &CCInfo;
	};
	} // namespace

	void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
	SDValue &Chain, unsigned StackSize) {
	// If the function takes variable number of arguments, make a frame index for
	// the start of the first vararg value... for expansion of llvm.va_start. We
	// can skip this if there are no va_start calls.
	if (is64Bit() \|\| (CallConv != CallingConv::X86_FastCall &&
	CallConv != CallingConv::X86_ThisCall)) {
	FuncInfo->setVarArgsFrameIndex(
	FrameInfo.CreateFixedObject(1, StackSize, true));
	}

	// 64-bit calling conventions support varargs and register parameters, so we
	// have to do extra work to spill them in the prologue.
	if (is64Bit()) {
	// Find the first unallocated argument registers.
	ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
	ArrayRef<MCPhysReg> ArgXMMs =
	get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
	unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);

	assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
	"SSE register cannot be used when SSE is disabled!");

	if (isWin64()) {
	// Get to the caller-allocated home save location. Add 8 to account
	// for the return address.
	int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
	FuncInfo->setRegSaveFrameIndex(
	FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
	// Fixup to set vararg frame on shadow area (4 x i64).
	if (NumIntRegs < 4)
	FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
	} else {
	// For X86-64, if there are vararg parameters that are passed via
	// registers, then we must store them to their spots on the stack so
	// they may be loaded by dereferencing the result of va_next.
	FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
	FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
	FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
	ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
	}

	SmallVector<SDValue, 6>
	LiveGPRs; // list of SDValue for GPR registers keeping live input value
	SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
	// keeping live input value
	SDValue ALVal; // if applicable keeps SDValue for %al register

	// Gather all the live in physical registers.
	for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
	Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
	LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
	}
	const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
	if (!AvailableXmms.empty()) {
	Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
	ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
	for (MCPhysReg Reg : AvailableXmms) {
	// FastRegisterAllocator spills virtual registers at basic
	// block boundary. That leads to usages of xmm registers
	// outside of check for %al. Pass physical registers to
	// VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
	TheMachineFunction.getRegInfo().addLiveIn(Reg);
	LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
	}
	}

	// Store the integer parameter registers.
	SmallVector<SDValue, 8> MemOps;
	SDValue RSFIN =
	DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
	TargLowering.getPointerTy(DAG.getDataLayout()));
	unsigned Offset = FuncInfo->getVarArgsGPOffset();
	for (SDValue Val : LiveGPRs) {
	SDValue FIN = DAG.getNode(ISD::ADD, DL,
	TargLowering.getPointerTy(DAG.getDataLayout()),
	RSFIN, DAG.getIntPtrConstant(Offset, DL));
	SDValue Store =
	DAG.getStore(Val.getValue(1), DL, Val, FIN,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(),
	FuncInfo->getRegSaveFrameIndex(), Offset));
	MemOps.push_back(Store);
	Offset += 8;
	}

	// Now store the XMM (fp + vector) parameter registers.
	if (!LiveXMMRegs.empty()) {
	SmallVector<SDValue, 12> SaveXMMOps;
	SaveXMMOps.push_back(Chain);
	SaveXMMOps.push_back(ALVal);
	SaveXMMOps.push_back(RSFIN);
	SaveXMMOps.push_back(
	DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
	llvm::append_range(SaveXMMOps, LiveXMMRegs);
	MachineMemOperand *StoreMMO =
	DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
	Offset),
	MachineMemOperand::MOStore, 128, Align(16));
	MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
	DL, DAG.getVTList(MVT::Other),
	SaveXMMOps, MVT::i8, StoreMMO));
	}

	if (!MemOps.empty())
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}
	}

	void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
	// Find the largest legal vector type.
	MVT VecVT = MVT::Other;
	// FIXME: Only some x86_32 calling conventions support AVX512.
	if (Subtarget.useAVX512Regs() &&
	(is64Bit() \|\| (CallConv == CallingConv::X86_VectorCall \|\|
	CallConv == CallingConv::Intel_OCL_BI)))
	VecVT = MVT::v16f32;
	else if (Subtarget.hasAVX())
	VecVT = MVT::v8f32;
	else if (Subtarget.hasSSE2())
	VecVT = MVT::v4f32;

	// We forward some GPRs and some vector types.
	SmallVector<MVT, 2> RegParmTypes;
	MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
	RegParmTypes.push_back(IntVT);
	if (VecVT != MVT::Other)
	RegParmTypes.push_back(VecVT);

	// Compute the set of forwarded registers. The rest are scratch.
	SmallVectorImpl<ForwardedRegister> &Forwards =
	FuncInfo->getForwardedMustTailRegParms();
	CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);

	// Forward AL for SysV x86_64 targets, since it is used for varargs.
	if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
	Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
	Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
	}

	// Copy all forwards from physical to virtual registers.
	for (ForwardedRegister &FR : Forwards) {
	// FIXME: Can we use a less constrained schedule?
	SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
	FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
	TargLowering.getRegClassFor(FR.VT));
	Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
	}
	}

	void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
	unsigned StackSize) {
	// Set FrameIndex to the 0xAAAAAAA value to mark unset state.
	// If necessary, it would be set into the correct value later.
	FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
	FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);

	if (FrameInfo.hasVAStart())
	createVarArgAreaAndStoreRegisters(Chain, StackSize);

	if (FrameInfo.hasMustTailInVarArgFunc())
	forwardMustTailParameters(Chain);
	}

	SDValue X86TargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();

	const Function &F = MF.getFunction();
	if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
	F.getName() == "main")
	FuncInfo->setForceFramePointer(true);

	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool Is64Bit = Subtarget.is64Bit();
	bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);

	assert(
	!(IsVarArg && canGuaranteeTCO(CallConv)) &&
	"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

	// Allocate shadow area for Win64.
	if (IsWin64)
	CCInfo.AllocateStack(32, Align(8));

	CCInfo.AnalyzeArguments(Ins, CC_X86);

	// In vectorcall calling convention a second pass is required for the HVA
	// types.
	if (CallingConv::X86_VectorCall == CallConv) {
	CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
	}

	// The next loop assumes that the locations are in the same order of the
	// input arguments.
	assert(isSortedByValueNo(ArgLocs) &&
	"Argument Location list must be sorted before lowering");

	SDValue ArgValue;
	for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
	++I, ++InsIndex) {
	assert(InsIndex < Ins.size() && "Invalid Ins index");
	CCValAssign &VA = ArgLocs[I];

	if (VA.isRegLoc()) {
	EVT RegVT = VA.getLocVT();
	if (VA.needsCustom()) {
	assert(
	VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");

	// v64i1 values, in regcall calling convention, that are
	// compiled to 32 bit arch, are split up into two registers.
	ArgValue =
	getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
	} else {
	const TargetRegisterClass *RC;
	if (RegVT == MVT::i8)
	RC = &X86::GR8RegClass;
	else if (RegVT == MVT::i16)
	RC = &X86::GR16RegClass;
	else if (RegVT == MVT::i32)
	RC = &X86::GR32RegClass;
	else if (Is64Bit && RegVT == MVT::i64)
	RC = &X86::GR64RegClass;
	else if (RegVT == MVT::f16)
	RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
	else if (RegVT == MVT::f32)
	RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
	else if (RegVT == MVT::f64)
	RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
	else if (RegVT == MVT::f80)
	RC = &X86::RFP80RegClass;
	else if (RegVT == MVT::f128)
	RC = &X86::VR128RegClass;
	else if (RegVT.is512BitVector())
	RC = &X86::VR512RegClass;
	else if (RegVT.is256BitVector())
	RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
	else if (RegVT.is128BitVector())
	RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
	else if (RegVT == MVT::x86mmx)
	RC = &X86::VR64RegClass;
	else if (RegVT == MVT::v1i1)
	RC = &X86::VK1RegClass;
	else if (RegVT == MVT::v8i1)
	RC = &X86::VK8RegClass;
	else if (RegVT == MVT::v16i1)
	RC = &X86::VK16RegClass;
	else if (RegVT == MVT::v32i1)
	RC = &X86::VK32RegClass;
	else if (RegVT == MVT::v64i1)
	RC = &X86::VK64RegClass;
	else
	llvm_unreachable("Unknown argument type!");

	Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
	}

	// If this is an 8 or 16-bit value, it is really passed promoted to 32
	// bits. Insert an assert[sz]ext to capture this, then truncate to the
	// right size.
	if (VA.getLocInfo() == CCValAssign::SExt)
	ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
	DAG.getValueType(VA.getValVT()));
	else if (VA.getLocInfo() == CCValAssign::ZExt)
	ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
	DAG.getValueType(VA.getValVT()));
	else if (VA.getLocInfo() == CCValAssign::BCvt)
	ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);

	if (VA.isExtInLoc()) {
	// Handle MMX values passed in XMM regs.
	if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
	ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
	else if (VA.getValVT().isVector() &&
	VA.getValVT().getScalarType() == MVT::i1 &&
	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
	// Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
	ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
	} else
	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
	}
	} else {
	assert(VA.isMemLoc());
	ArgValue =
	LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
	}

	// If value is passed via pointer - do a load.
	if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
	ArgValue =
	DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());

	InVals.push_back(ArgValue);
	}

	for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
	if (Ins[I].Flags.isSwiftAsync()) {
	auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
	if (Subtarget.is64Bit())
	X86FI->setHasSwiftAsyncContext(true);
	else {
	int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
	X86FI->setSwiftAsyncContextFrameIdx(FI);
	SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
	DAG.getFrameIndex(FI, MVT::i32),
	MachinePointerInfo::getFixedStack(MF, FI));
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
	}
	}

	// Swift calling convention does not require we copy the sret argument
	// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
	if (CallConv == CallingConv::Swift \|\| CallConv == CallingConv::SwiftTail)
	continue;

	// All x86 ABIs require that for returning structs by value we copy the
	// sret argument into %rax/%eax (depending on ABI) for the return. Save
	// the argument into a virtual register so that we can access it from the
	// return points.
	if (Ins[I].Flags.isSRet()) {
	assert(!FuncInfo->getSRetReturnReg() &&
	"SRet return has already been set");
	MVT PtrTy = getPointerTy(DAG.getDataLayout());
	Register Reg =
	MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
	FuncInfo->setSRetReturnReg(Reg);
	SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
	break;
	}
	}

	unsigned StackSize = CCInfo.getNextStackOffset();
	// Align stack specially for tail calls.
	if (shouldGuaranteeTCO(CallConv,
	MF.getTarget().Options.GuaranteedTailCallOpt))
	StackSize = GetAlignedArgumentStackSize(StackSize, DAG);

	if (IsVarArg)
	VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
	.lowerVarArgsParameters(Chain, StackSize);

	// Some CCs need callee pop.
	if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
	MF.getTarget().Options.GuaranteedTailCallOpt)) {
	FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
	} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
	// X86 interrupts must pop the error code (and the alignment padding) if
	// present.
	FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
	} else {
	FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
	// If this is an sret function, the return should pop the hidden pointer.
	if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
	FuncInfo->setBytesToPopOnReturn(4);
	}

	if (!Is64Bit) {
	// RegSaveFrameIndex is X86-64 only.
	FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
	}

	FuncInfo->setArgumentStackSize(StackSize);

	if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
	EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
	if (Personality == EHPersonality::CoreCLR) {
	assert(Is64Bit);
	// TODO: Add a mechanism to frame lowering that will allow us to indicate
	// that we'd prefer this slot be allocated towards the bottom of the frame
	// (i.e. near the stack pointer after allocating the frame). Every
	// funclet needs a copy of this slot in its (mostly empty) frame, and the
	// offset from the bottom of this and each funclet's frame must be the
	// same, so the size of funclets' (mostly empty) frames is dictated by
	// how far this slot is from the bottom (since they allocate just enough
	// space to accommodate holding this slot at the correct offset).
	int PSPSymFI = MFI.CreateStackObject(8, Align(8), /isSpillSlot=/false);
	EHInfo->PSPSymFrameIdx = PSPSymFI;
	}
	}

	if (CallConv == CallingConv::X86_RegCall \|\|
	F.hasFnAttribute("no_caller_saved_registers")) {
	MachineRegisterInfo &MRI = MF.getRegInfo();
	for (std::pair<Register, Register> Pair : MRI.liveins())
	MRI.disableCalleeSavedRegister(Pair.first);
	}

	return Chain;
	}

	SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
	SDValue Arg, const SDLoc &dl,
	SelectionDAG &DAG,
	const CCValAssign &VA,
	ISD::ArgFlagsTy Flags,
	bool isByVal) const {
	unsigned LocMemOffset = VA.getLocMemOffset();
	SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	StackPtr, PtrOff);
	if (isByVal)
	return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);

	MaybeAlign Alignment;
	if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
	Arg.getSimpleValueType() != MVT::f80)
	Alignment = MaybeAlign(4);
	return DAG.getStore(
	Chain, dl, Arg, PtrOff,
	MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
	Alignment);
	}

	/// Emit a load of return address if tail call
	/// optimization is performed and it is required.
	SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
	SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
	bool Is64Bit, int FPDiff, const SDLoc &dl) const {
	// Adjust the Return address stack slot.
	EVT VT = getPointerTy(DAG.getDataLayout());
	OutRetAddr = getReturnAddressFrameIndex(DAG);

	// Load the "old" Return address.
	OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
	return SDValue(OutRetAddr.getNode(), 1);
	}

	/// Emit a store of the return address if tail call
	/// optimization is performed and it is required (FPDiff!=0).
	static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
	SDValue Chain, SDValue RetAddrFrIdx,
	EVT PtrVT, unsigned SlotSize,
	int FPDiff, const SDLoc &dl) {
	// Store the return address to the appropriate stack slot.
	if (!FPDiff) return Chain;
	// Calculate the new stack slot for the return address.
	int NewReturnAddrFI =
	MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
	false);
	SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
	Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), NewReturnAddrFI));
	return Chain;
	}

	/// Returns a vector_shuffle mask for an movs{s\|d}, movd
	/// operation of specified width.
	static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
	SDValue V2) {
	unsigned NumElems = VT.getVectorNumElements();
	SmallVector<int, 8> Mask;
	Mask.push_back(NumElems);
	for (unsigned i = 1; i != NumElems; ++i)
	Mask.push_back(i);
	return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
	}

	SDValue
	X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &dl = CLI.DL;
	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	CallingConv::ID CallConv = CLI.CallConv;
	bool &isTailCall = CLI.IsTailCall;
	bool isVarArg = CLI.IsVarArg;
	const auto *CB = CLI.CB;

	MachineFunction &MF = DAG.getMachineFunction();
	bool Is64Bit = Subtarget.is64Bit();
	bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
	bool IsSibcall = false;
	bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt \|\|
	CallConv == CallingConv::Tail \|\| CallConv == CallingConv::SwiftTail;
	bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
	X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
	bool HasNCSR = (CB && isa<CallInst>(CB) &&
	CB->hasFnAttr("no_caller_saved_registers"));
	bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
	bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
	bool IsCFICall = IsIndirectCall && CLI.CFIType;
	const Module *M = MF.getMMI().getModule();
	Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");

	MachineFunction::CallSiteInfo CSInfo;
	if (CallConv == CallingConv::X86_INTR)
	report_fatal_error("X86 interrupts may not be called directly");

	bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
	if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
	// If we are using a GOT, disable tail calls to external symbols with
	// default visibility. Tail calling such a symbol requires using a GOT
	// relocation, which forces early binding of the symbol. This breaks code
	// that require lazy function symbol resolution. Using musttail or
	// GuaranteedTailCallOpt will override this.
	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
	if (!G \|\| (!G->getGlobal()->hasLocalLinkage() &&
	G->getGlobal()->hasDefaultVisibility()))
	isTailCall = false;
	}

	if (isTailCall && !IsMustTail) {
	// Check if it's really possible to do a tail call.
	isTailCall = IsEligibleForTailCallOptimization(
	Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
	Ins, DAG);

	// Sibcalls are automatically detected tailcalls which do not require
	// ABI changes.
	if (!IsGuaranteeTCO && isTailCall)
	IsSibcall = true;

	if (isTailCall)
	++NumTailCalls;
	}

	if (IsMustTail && !isTailCall)
	report_fatal_error("failed to perform tail call elimination on a call "
	"site marked musttail");

	assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
	"Var args not supported with calling convention fastcc, ghc or hipe");

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

	// Allocate shadow area for Win64.
	if (IsWin64)
	CCInfo.AllocateStack(32, Align(8));

	CCInfo.AnalyzeArguments(Outs, CC_X86);

	// In vectorcall calling convention a second pass is required for the HVA
	// types.
	if (CallingConv::X86_VectorCall == CallConv) {
	CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
	}

	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
	if (IsSibcall)
	// This is a sibcall. The memory operands are available in caller's
	// own caller's stack.
	NumBytes = 0;
	else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
	NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);

	int FPDiff = 0;
	if (isTailCall &&
	shouldGuaranteeTCO(CallConv,
	MF.getTarget().Options.GuaranteedTailCallOpt)) {
	// Lower arguments at fp - stackoffset + fpdiff.
	unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();

	FPDiff = NumBytesCallerPushed - NumBytes;

	// Set the delta of movement of the returnaddr stackslot.
	// But only set if delta is greater than previous delta.
	if (FPDiff < X86Info->getTCReturnAddrDelta())
	X86Info->setTCReturnAddrDelta(FPDiff);
	}

	unsigned NumBytesToPush = NumBytes;
	unsigned NumBytesToPop = NumBytes;

	// If we have an inalloca argument, all stack space has already been allocated
	// for us and be right at the top of the stack. We don't support multiple
	// arguments passed in memory when using inalloca.
	if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
	NumBytesToPush = 0;
	if (!ArgLocs.back().isMemLoc())
	report_fatal_error("cannot use inalloca attribute on a register "
	"parameter");
	if (ArgLocs.back().getLocMemOffset() != 0)
	report_fatal_error("any parameter with the inalloca attribute must be "
	"the only memory argument");
	} else if (CLI.IsPreallocated) {
	assert(ArgLocs.back().isMemLoc() &&
	"cannot use preallocated attribute on a register "
	"parameter");
	SmallVector<size_t, 4> PreallocatedOffsets;
	for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
	if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
	PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
	}
	}
	auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
	size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
	MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
	MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
	NumBytesToPush = 0;
	}

	if (!IsSibcall && !IsMustTail)
	Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
	NumBytes - NumBytesToPush, dl);

	SDValue RetAddrFrIdx;
	// Load return address for tail calls.
	if (isTailCall && FPDiff)
	Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
	Is64Bit, FPDiff, dl);

	SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;
	SDValue StackPtr;

	// The next loop assumes that the locations are in the same order of the
	// input arguments.
	assert(isSortedByValueNo(ArgLocs) &&
	"Argument Location list must be sorted before lowering");

	// Walk the register/memloc assignments, inserting copies/loads. In the case
	// of tail call optimization arguments are handle later.
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
	++I, ++OutIndex) {
	assert(OutIndex < Outs.size() && "Invalid Out index");
	// Skip inalloca/preallocated arguments, they have already been written.
	ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
	if (Flags.isInAlloca() \|\| Flags.isPreallocated())
	continue;

	CCValAssign &VA = ArgLocs[I];
	EVT RegVT = VA.getLocVT();
	SDValue Arg = OutVals[OutIndex];
	bool isByVal = Flags.isByVal();

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
	break;
	case CCValAssign::AExt:
	if (Arg.getValueType().isVector() &&
	Arg.getValueType().getVectorElementType() == MVT::i1)
	Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
	else if (RegVT.is128BitVector()) {
	// Special case: passing MMX values in XMM registers.
	Arg = DAG.getBitcast(MVT::i64, Arg);
	Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
	Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
	} else
	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getBitcast(RegVT, Arg);
	break;
	case CCValAssign::Indirect: {
	if (isByVal) {
	// Memcpy the argument to a temporary stack slot to prevent
	// the caller from seeing any modifications the callee may make
	// as guaranteed by the `byval` attribute.
	int FrameIdx = MF.getFrameInfo().CreateStackObject(
	Flags.getByValSize(),
	std::max(Align(16), Flags.getNonZeroByValAlign()), false);
	SDValue StackSlot =
	DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
	Chain =
	CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
	// From now on treat this as a regular pointer
	Arg = StackSlot;
	isByVal = false;
	} else {
	// Store the argument.
	SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
	int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
	Chain = DAG.getStore(
	Chain, dl, Arg, SpillSlot,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
	Arg = SpillSlot;
	}
	break;
	}
	}

	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");
	// Split v64i1 value into two registers
	Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
	} else if (VA.isRegLoc()) {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	const TargetOptions &Options = DAG.getTarget().Options;
	if (Options.EmitCallSiteInfo)
	CSInfo.emplace_back(VA.getLocReg(), I);
	if (isVarArg && IsWin64) {
	// Win64 ABI requires argument XMM reg to be copied to the corresponding
	// shadow reg if callee is a varargs function.
	Register ShadowReg;
	switch (VA.getLocReg()) {
	case X86::XMM0: ShadowReg = X86::RCX; break;
	case X86::XMM1: ShadowReg = X86::RDX; break;
	case X86::XMM2: ShadowReg = X86::R8; break;
	case X86::XMM3: ShadowReg = X86::R9; break;
	}
	if (ShadowReg)
	RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
	}
	} else if (!IsSibcall && (!isTailCall \|\| isByVal)) {
	assert(VA.isMemLoc());
	if (!StackPtr.getNode())
	StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
	getPointerTy(DAG.getDataLayout()));
	MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
	dl, DAG, VA, Flags, isByVal));
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	if (Subtarget.isPICStyleGOT()) {
	// ELF / PIC requires GOT in the EBX register before function calls via PLT
	// GOT pointer (except regcall).
	if (!isTailCall) {
	// Indirect call with RegCall calling convertion may use up all the
	// general registers, so it is not suitable to bind EBX reister for
	// GOT address, just let register allocator handle it.
	if (CallConv != CallingConv::X86_RegCall)
	RegsToPass.push_back(std::make_pair(
	Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
	getPointerTy(DAG.getDataLayout()))));
	} else {
	// If we are tail calling and generating PIC/GOT style code load the
	// address of the callee into ECX. The value in ecx is used as target of
	// the tail jump. This is done to circumvent the ebx/callee-saved problem
	// for tail calls on PIC/GOT architectures. Normally we would just put the
	// address of GOT into ebx and then call target@PLT. But for tail calls
	// ebx would be restored (since ebx is callee saved) before jumping to the
	// target@PLT.

	// Note: The actual moving to ECX is done further down.
	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
	if (G && !G->getGlobal()->hasLocalLinkage() &&
	G->getGlobal()->hasDefaultVisibility())
	Callee = LowerGlobalAddress(Callee, DAG);
	else if (isa<ExternalSymbolSDNode>(Callee))
	Callee = LowerExternalSymbol(Callee, DAG);
	}
	}

	if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
	(Subtarget.hasSSE1() \|\| !M->getModuleFlag("SkipRaxSetup"))) {
	// From AMD64 ABI document:
	// For calls that may call functions that use varargs or stdargs
	// (prototype-less calls or calls to functions containing ellipsis (...) in
	// the declaration) %al is used as hidden argument to specify the number
	// of SSE registers used. The contents of %al do not need to match exactly
	// the number of registers, but must be an ubound on the number of SSE
	// registers used and is in the range 0 - 8 inclusive.

	// Count the number of XMM registers allocated.
	static const MCPhysReg XMMArgRegs[] = {
	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
	};
	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
	assert((Subtarget.hasSSE1() \|\| !NumXMMRegs)
	&& "SSE registers cannot be used when SSE is disabled");
	RegsToPass.push_back(std::make_pair(Register(X86::AL),
	DAG.getConstant(NumXMMRegs, dl,
	MVT::i8)));
	}

	if (isVarArg && IsMustTail) {
	const auto &Forwards = X86Info->getForwardedMustTailRegParms();
	for (const auto &F : Forwards) {
	SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
	RegsToPass.push_back(std::make_pair(F.PReg, Val));
	}
	}

	// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
	// don't need this because the eligibility check rejects calls that require
	// shuffling arguments passed in memory.
	if (!IsSibcall && isTailCall) {
	// Force all the incoming stack arguments to be loaded from the stack
	// before any new outgoing arguments are stored to the stack, because the
	// outgoing stack slots may alias the incoming argument stack slots, and
	// the alias isn't otherwise explicit. This is slightly more conservative
	// than necessary, because it means that each store effectively depends
	// on every argument instead of just those arguments it would clobber.
	SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);

	SmallVector<SDValue, 8> MemOpChains2;
	SDValue FIN;
	int FI = 0;
	for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
	++I, ++OutsIndex) {
	CCValAssign &VA = ArgLocs[I];

	if (VA.isRegLoc()) {
	if (VA.needsCustom()) {
	assert((CallConv == CallingConv::X86_RegCall) &&
	"Expecting custom case only in regcall calling convention");
	// This means that we are in special case where one argument was
	// passed through two register locations - Skip the next location
	++I;
	}

	continue;
	}

	assert(VA.isMemLoc());
	SDValue Arg = OutVals[OutsIndex];
	ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
	// Skip inalloca/preallocated arguments. They don't require any work.
	if (Flags.isInAlloca() \|\| Flags.isPreallocated())
	continue;
	// Create frame index.
	int32_t Offset = VA.getLocMemOffset()+FPDiff;
	uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
	FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
	FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

	if (Flags.isByVal()) {
	// Copy relative to framepointer.
	SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
	if (!StackPtr.getNode())
	StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
	getPointerTy(DAG.getDataLayout()));
	Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	StackPtr, Source);

	MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
	ArgChain,
	Flags, DAG, dl));
	} else {
	// Store relative to framepointer.
	MemOpChains2.push_back(DAG.getStore(
	ArgChain, dl, Arg, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
	}
	}

	if (!MemOpChains2.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);

	// Store the return address to the appropriate stack slot.
	Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
	getPointerTy(DAG.getDataLayout()),
	RegInfo->getSlotSize(), FPDiff, dl);
	}

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into registers.
	SDValue InFlag;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
	RegsToPass[i].second, InFlag);
	InFlag = Chain.getValue(1);
	}

	if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
	assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
	// In the 64-bit large code model, we have to make all calls
	// through a register, since the call instruction's 32-bit
	// pc-relative offset may not be large enough to hold the whole
	// address.
	} else if (Callee->getOpcode() == ISD::GlobalAddress \|\|
	Callee->getOpcode() == ISD::ExternalSymbol) {
	// Lower direct calls to global addresses and external symbols. Setting
	// ForCall to true here has the effect of removing WrapperRIP when possible
	// to allow direct calls to be selected without first materializing the
	// address into a register.
	Callee = LowerGlobalOrExternal(Callee, DAG, /ForCall=/true);
	} else if (Subtarget.isTarget64BitILP32() &&
	Callee.getValueType() == MVT::i32) {
	// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
	Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
	}

	// Returns a chain & a flag for retval copy to use.
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SmallVector<SDValue, 8> Ops;

	if (!IsSibcall && isTailCall && !IsMustTail) {
	Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InFlag, dl);
	InFlag = Chain.getValue(1);
	}

	Ops.push_back(Chain);
	Ops.push_back(Callee);

	if (isTailCall)
	Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
	RegsToPass[i].second.getValueType()));

	// Add a register mask operand representing the call-preserved registers.
	const uint32_t *Mask = [&]() {
	auto AdaptedCC = CallConv;
	// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
	// use X86_INTR calling convention because it has the same CSR mask
	// (same preserved registers).
	if (HasNCSR)
	AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
	// If NoCalleeSavedRegisters is requested, than use GHC since it happens
	// to use the CSR_NoRegs_RegMask.
	if (CB && CB->hasFnAttr("no_callee_saved_registers"))
	AdaptedCC = (CallingConv::ID)CallingConv::GHC;
	return RegInfo->getCallPreservedMask(MF, AdaptedCC);
	}();
	assert(Mask && "Missing call preserved mask for calling convention");

	// If this is an invoke in a 32-bit function using a funclet-based
	// personality, assume the function clobbers all registers. If an exception
	// is thrown, the runtime will not restore CSRs.
	// FIXME: Model this more precisely so that we can register allocate across
	// the normal edge and spill and fill across the exceptional edge.
	if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
	const Function &CallerFn = MF.getFunction();
	EHPersonality Pers =
	CallerFn.hasPersonalityFn()
	? classifyEHPersonality(CallerFn.getPersonalityFn())
	: EHPersonality::Unknown;
	if (isFuncletEHPersonality(Pers))
	Mask = RegInfo->getNoPreservedMask();
	}

	// Define a new register mask from the existing mask.
	uint32_t *RegMask = nullptr;

	// In some calling conventions we need to remove the used physical registers
	// from the reg mask.
	if (CallConv == CallingConv::X86_RegCall \|\| HasNCSR) {
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

	// Allocate a new Reg Mask and copy Mask.
	RegMask = MF.allocateRegMask();
	unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
	memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);

	// Make sure all sub registers of the argument registers are reset
	// in the RegMask.
	for (auto const &RegPair : RegsToPass)
	for (MCSubRegIterator SubRegs(RegPair.first, TRI, /IncludeSelf=/true);
	SubRegs.isValid(); ++SubRegs)
	RegMask[SubRegs / 32] &= ~(1u << (SubRegs % 32));

	// Create the RegMask Operand according to our updated mask.
	Ops.push_back(DAG.getRegisterMask(RegMask));
	} else {
	// Create the RegMask Operand according to the static mask.
	Ops.push_back(DAG.getRegisterMask(Mask));
	}

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	if (isTailCall) {
	// We used to do:
	//// If this is the first return lowered for this function, add the regs
	//// to the liveout set for the function.
	// This isn't right, although it's probably harmless on x86; liveouts
	// should be computed from returns not tail calls. Consider a void
	// function making a tail call to a function returning int.
	MF.getFrameInfo().setHasTailCall();
	SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);

	if (IsCFICall)
	Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());

	DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
	return Ret;
	}

	if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
	Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
	} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
	// Calls with a "clang.arc.attachedcall" bundle are special. They should be
	// expanded to the call, directly followed by a special marker sequence and
	// a call to a ObjC library function. Use the CALL_RVMARKER to do that.
	assert(!isTailCall &&
	"tail calls cannot be marked with clang.arc.attachedcall");
	assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");

	// Add a target global address for the retainRV/claimRV runtime function
	// just before the call target.
	Function ARCFn = objcarc::getAttachedARCFunction(CLI.CB);
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
	Ops.insert(Ops.begin() + 1, GA);
	Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
	} else {
	Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
	}

	if (IsCFICall)
	Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());

	InFlag = Chain.getValue(1);
	DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
	DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));

	// Save heapallocsite metadata.
	if (CLI.CB)
	if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
	DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);

	// Create the CALLSEQ_END node.
	unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
	if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
	DAG.getTarget().Options.GuaranteedTailCallOpt))
	NumBytesForCalleeToPop = NumBytes; // Callee pops everything
	else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
	// If this call passes a struct-return pointer, the callee
	// pops that struct pointer.
	NumBytesForCalleeToPop = 4;

	// Returns a flag for retval copy to use.
	if (!IsSibcall) {
	Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
	InFlag, dl);
	InFlag = Chain.getValue(1);
	}

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
	InVals, RegMask);
	}

	//===----------------------------------------------------------------------===//
	// Fast Calling Convention (tail call) implementation
	//===----------------------------------------------------------------------===//

	// Like std call, callee cleans arguments, convention except that ECX is
	// reserved for storing the tail called function address. Only 2 registers are
	// free for argument passing (inreg). Tail call optimization is performed
	// provided:
	// * tailcallopt is enabled
	// * caller/callee are fastcc
	// On X86_64 architecture with GOT-style position independent code only local
	// (within module) calls are supported at the moment.
	// To keep the stack aligned according to platform abi the function
	// GetAlignedArgumentStackSize ensures that argument delta is always multiples
	// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
	// If a tail called function callee has more arguments than the caller the
	// caller needs to make sure that there is room to move the RETADDR to. This is
	// achieved by reserving an area the size of the argument delta right after the
	// original RETADDR, but before the saved framepointer or the spilled registers
	// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
	// stack layout:
	// arg1
	// arg2
	// RETADDR
	// [ new RETADDR
	// move area ]
	// (possible EBP)
	// ESI
	// EDI
	// local1 ..

	/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
	/// requirement.
	unsigned
	X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
	SelectionDAG &DAG) const {
	const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
	const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
	assert(StackSize % SlotSize == 0 &&
	"StackSize must be a multiple of SlotSize");
	return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
	}

	/// Return true if the given stack call argument is already available in the
	/// same position (relatively) of the caller's incoming argument stack.
	static
	bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
	MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
	const X86InstrInfo *TII, const CCValAssign &VA) {
	unsigned Bytes = Arg.getValueSizeInBits() / 8;

	for (;;) {
	// Look through nodes that don't alter the bits of the incoming value.
	unsigned Op = Arg.getOpcode();
	if (Op == ISD::ZERO_EXTEND \|\| Op == ISD::ANY_EXTEND \|\| Op == ISD::BITCAST) {
	Arg = Arg.getOperand(0);
	continue;
	}
	if (Op == ISD::TRUNCATE) {
	const SDValue &TruncInput = Arg.getOperand(0);
	if (TruncInput.getOpcode() == ISD::AssertZext &&
	cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
	Arg.getValueType()) {
	Arg = TruncInput.getOperand(0);
	continue;
	}
	}
	break;
	}

	int FI = INT_MAX;
	if (Arg.getOpcode() == ISD::CopyFromReg) {
	Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
	if (!VR.isVirtual())
	return false;
	MachineInstr *Def = MRI->getVRegDef(VR);
	if (!Def)
	return false;
	if (!Flags.isByVal()) {
	if (!TII->isLoadFromStackSlot(*Def, FI))
	return false;
	} else {
	unsigned Opcode = Def->getOpcode();
	if ((Opcode == X86::LEA32r \|\| Opcode == X86::LEA64r \|\|
	Opcode == X86::LEA64_32r) &&
	Def->getOperand(1).isFI()) {
	FI = Def->getOperand(1).getIndex();
	Bytes = Flags.getByValSize();
	} else
	return false;
	}
	} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
	if (Flags.isByVal())
	// ByVal argument is passed in as a pointer but it's now being
	// dereferenced. e.g.
	// define @foo(%struct.X* %A) {
	// tail call @bar(%struct.X* byval %A)
	// }
	return false;
	SDValue Ptr = Ld->getBasePtr();
	FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
	if (!FINode)
	return false;
	FI = FINode->getIndex();
	} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
	FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
	FI = FINode->getIndex();
	Bytes = Flags.getByValSize();
	} else
	return false;

	assert(FI != INT_MAX);
	if (!MFI.isFixedObjectIndex(FI))
	return false;

	if (Offset != MFI.getObjectOffset(FI))
	return false;

	// If this is not byval, check that the argument stack object is immutable.
	// inalloca and argument copy elision can create mutable argument stack
	// objects. Byval objects can be mutated, but a byval call intends to pass the
	// mutated memory.
	if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
	return false;

	if (VA.getLocVT().getFixedSizeInBits() >
	Arg.getValueSizeInBits().getFixedValue()) {
	// If the argument location is wider than the argument type, check that any
	// extension flags match.
	if (Flags.isZExt() != MFI.isObjectZExt(FI) \|\|
	Flags.isSExt() != MFI.isObjectSExt(FI)) {
	return false;
	}
	}

	return Bytes == MFI.getObjectSize(FI);
	}

	/// Check whether the call is eligible for tail call optimization. Targets
	/// that want to do tail call optimization should implement this function.
	bool X86TargetLowering::IsEligibleForTailCallOptimization(
	SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
	bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	// If -tailcallopt is specified, make fastcc functions tail-callable.
	MachineFunction &MF = DAG.getMachineFunction();
	const Function &CallerF = MF.getFunction();

	// If the function return type is x86_fp80 and the callee return type is not,
	// then the FP_EXTEND of the call result is not a nop. It's not safe to
	// perform a tailcall optimization here.
	if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
	return false;

	CallingConv::ID CallerCC = CallerF.getCallingConv();
	bool CCMatch = CallerCC == CalleeCC;
	bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
	bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
	bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt \|\|
	CalleeCC == CallingConv::Tail \|\| CalleeCC == CallingConv::SwiftTail;

	// Win64 functions have extra shadow space for argument homing. Don't do the
	// sibcall if the caller and callee have mismatched expectations for this
	// space.
	if (IsCalleeWin64 != IsCallerWin64)
	return false;

	if (IsGuaranteeTCO) {
	if (canGuaranteeTCO(CalleeCC) && CCMatch)
	return true;
	return false;
	}

	// Look for obvious safe cases to perform tail call optimization that do not
	// require ABI changes. This is what gcc calls sibcall.

	// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
	// emit a special epilogue.
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	if (RegInfo->hasStackRealignment(MF))
	return false;

	// Also avoid sibcall optimization if we're an sret return fn and the callee
	// is incompatible. See comment in LowerReturn about why hasStructRetAttr is
	// insufficient.
	if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
	// For a compatible tail call the callee must return our sret pointer. So it
	// needs to be (a) an sret function itself and (b) we pass our sret as its
	// sret. Condition #b is harder to determine.
	return false;
	} else if (IsCalleePopSRet)
	// The callee pops an sret, so we cannot tail-call, as our caller doesn't
	// expect that.
	return false;

	// Do not sibcall optimize vararg calls unless all arguments are passed via
	// registers.
	LLVMContext &C = *DAG.getContext();
	if (isVarArg && !Outs.empty()) {
	// Optimizing for varargs on Win64 is unlikely to be safe without
	// additional testing.
	if (IsCalleeWin64 \|\| IsCallerWin64)
	return false;

	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);

	CCInfo.AnalyzeCallOperands(Outs, CC_X86);
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
	if (!ArgLocs[i].isRegLoc())
	return false;
	}

	// If the call result is in ST0 / ST1, it needs to be popped off the x87
	// stack. Therefore, if it's not used by the call it is not safe to optimize
	// this into a sibcall.
	bool Unused = false;
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	if (!Ins[i].Used) {
	Unused = true;
	break;
	}
	}
	if (Unused) {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
	CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
	for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
	CCValAssign &VA = RVLocs[i];
	if (VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1)
	return false;
	}
	}

	// Check that the call results are passed in the same way.
	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
	RetCC_X86, RetCC_X86))
	return false;
	// The callee has to preserve all registers the caller needs to preserve.
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
	if (!CCMatch) {
	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
	return false;
	}

	unsigned StackArgsSize = 0;

	// If the callee takes no arguments then go on to check the results of the
	// call.
	if (!Outs.empty()) {
	// Check if stack adjustment is needed. For now, do not do this if any
	// argument is passed on the stack.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);

	// Allocate shadow area for Win64
	if (IsCalleeWin64)
	CCInfo.AllocateStack(32, Align(8));

	CCInfo.AnalyzeCallOperands(Outs, CC_X86);
	StackArgsSize = CCInfo.getNextStackOffset();

	if (CCInfo.getNextStackOffset()) {
	// Check if the arguments are already laid out in the right way as
	// the caller's fixed stack objects.
	MachineFrameInfo &MFI = MF.getFrameInfo();
	const MachineRegisterInfo *MRI = &MF.getRegInfo();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[i];
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	if (VA.getLocInfo() == CCValAssign::Indirect)
	return false;
	if (!VA.isRegLoc()) {
	if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
	MFI, MRI, TII, VA))
	return false;
	}
	}
	}

	bool PositionIndependent = isPositionIndependent();
	// If the tailcall address may be in a register, then make sure it's
	// possible to register allocate for it. In 32-bit, the call address can
	// only target EAX, EDX, or ECX since the tail call must be scheduled after
	// callee-saved registers are restored. These happen to be the same
	// registers used to pass 'inreg' arguments so watch out for those.
	if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
	!isa<ExternalSymbolSDNode>(Callee)) \|\|
	PositionIndependent)) {
	unsigned NumInRegs = 0;
	// In PIC we need an extra register to formulate the address computation
	// for the callee.
	unsigned MaxInRegs = PositionIndependent ? 2 : 3;

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	if (!VA.isRegLoc())
	continue;
	Register Reg = VA.getLocReg();
	switch (Reg) {
	default: break;
	case X86::EAX: case X86::EDX: case X86::ECX:
	if (++NumInRegs == MaxInRegs)
	return false;
	break;
	}
	}
	}

	const MachineRegisterInfo &MRI = MF.getRegInfo();
	if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
	return false;
	}

	bool CalleeWillPop =
	X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
	MF.getTarget().Options.GuaranteedTailCallOpt);

	if (unsigned BytesToPop =
	MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
	// If we have bytes to pop, the callee must pop them.
	bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
	if (!CalleePopMatches)
	return false;
	} else if (CalleeWillPop && StackArgsSize > 0) {
	// If we don't have bytes to pop, make sure the callee doesn't pop any.
	return false;
	}

	return true;
	}

	FastISel *
	X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo) const {
	return X86::createFastISel(funcInfo, libInfo);
	}

	//===----------------------------------------------------------------------===//
	// Other Lowering Hooks
	//===----------------------------------------------------------------------===//

	bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
	bool AssumeSingleUse) {
	if (!AssumeSingleUse && !Op.hasOneUse())
	return false;
	if (!ISD::isNormalLoad(Op.getNode()))
	return false;

	// If this is an unaligned vector, make sure the target supports folding it.
	auto *Ld = cast<LoadSDNode>(Op.getNode());
	if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
	Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
	return false;

	// TODO: If this is a non-temporal load and the target has an instruction
	// for it, it should not be folded. See "useNonTemporalLoad()".

	return true;
	}

	bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
	const X86Subtarget &Subtarget,
	bool AssumeSingleUse) {
	assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
	if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
	return false;

	// We can not replace a wide volatile load with a broadcast-from-memory,
	// because that would narrow the load, which isn't legal for volatiles.
	auto *Ld = cast<LoadSDNode>(Op.getNode());
	return !Ld->isVolatile() \|\|
	Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
	}

	bool X86::mayFoldIntoStore(SDValue Op) {
	return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
	}

	bool X86::mayFoldIntoZeroExtend(SDValue Op) {
	if (Op.hasOneUse()) {
	unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
	return (ISD::ZERO_EXTEND == Opcode);
	}
	return false;
	}

	static bool isTargetShuffle(unsigned Opcode) {
	switch(Opcode) {
	default: return false;
	case X86ISD::BLENDI:
	case X86ISD::PSHUFB:
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFHW:
	case X86ISD::PSHUFLW:
	case X86ISD::SHUFP:
	case X86ISD::INSERTPS:
	case X86ISD::EXTRQI:
	case X86ISD::INSERTQI:
	case X86ISD::VALIGN:
	case X86ISD::PALIGNR:
	case X86ISD::VSHLDQ:
	case X86ISD::VSRLDQ:
	case X86ISD::MOVLHPS:
	case X86ISD::MOVHLPS:
	case X86ISD::MOVSHDUP:
	case X86ISD::MOVSLDUP:
	case X86ISD::MOVDDUP:
	case X86ISD::MOVSS:
	case X86ISD::MOVSD:
	case X86ISD::MOVSH:
	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	case X86ISD::VBROADCAST:
	case X86ISD::VPERMILPI:
	case X86ISD::VPERMILPV:
	case X86ISD::VPERM2X128:
	case X86ISD::SHUF128:
	case X86ISD::VPERMIL2:
	case X86ISD::VPERMI:
	case X86ISD::VPPERM:
	case X86ISD::VPERMV:
	case X86ISD::VPERMV3:
	case X86ISD::VZEXT_MOVL:
	return true;
	}
	}

	static bool isTargetShuffleVariableMask(unsigned Opcode) {
	switch (Opcode) {
	default: return false;
	// Target Shuffles.
	case X86ISD::PSHUFB:
	case X86ISD::VPERMILPV:
	case X86ISD::VPERMIL2:
	case X86ISD::VPPERM:
	case X86ISD::VPERMV:
	case X86ISD::VPERMV3:
	return true;
	// 'Faux' Target Shuffles.
	case ISD::OR:
	case ISD::AND:
	case X86ISD::ANDNP:
	return true;
	}
	}

	SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
	int ReturnAddrIndex = FuncInfo->getRAIndex();

	if (ReturnAddrIndex == 0) {
	// Set up a frame object for the return address.
	unsigned SlotSize = RegInfo->getSlotSize();
	ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
	-(int64_t)SlotSize,
	false);
	FuncInfo->setRAIndex(ReturnAddrIndex);
	}

	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
	}

	bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
	bool hasSymbolicDisplacement) {
	// Offset should fit into 32 bit immediate field.
	if (!isInt<32>(Offset))
	return false;

	// If we don't have a symbolic displacement - we don't have any extra
	// restrictions.
	if (!hasSymbolicDisplacement)
	return true;

	// FIXME: Some tweaks might be needed for medium code model.
	if (M != CodeModel::Small && M != CodeModel::Kernel)
	return false;

	// For small code model we assume that latest object is 16MB before end of 31
	// bits boundary. We may also accept pretty large negative constants knowing
	// that all objects are in the positive half of address space.
	if (M == CodeModel::Small && Offset < 1610241024)
	return true;

	// For kernel code model we know that all object resist in the negative half
	// of 32bits address space. We may not accept negative offsets, since they may
	// be just off and we may accept pretty large positive ones.
	if (M == CodeModel::Kernel && Offset >= 0)
	return true;

	return false;
	}

	/// Determines whether the callee is required to pop its own arguments.
	/// Callee pop is necessary to support tail calls.
	bool X86::isCalleePop(CallingConv::ID CallingConv,
	bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
	// If GuaranteeTCO is true, we force some calls to be callee pop so that we
	// can guarantee TCO.
	if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
	return true;

	switch (CallingConv) {
	default:
	return false;
	case CallingConv::X86_StdCall:
	case CallingConv::X86_FastCall:
	case CallingConv::X86_ThisCall:
	case CallingConv::X86_VectorCall:
	return !is64Bit;
	}
	}

	/// Return true if the condition is an signed comparison operation.
	static bool isX86CCSigned(unsigned X86CC) {
	switch (X86CC) {
	default:
	llvm_unreachable("Invalid integer condition!");
	case X86::COND_E:
	case X86::COND_NE:
	case X86::COND_B:
	case X86::COND_A:
	case X86::COND_BE:
	case X86::COND_AE:
	return false;
	case X86::COND_G:
	case X86::COND_GE:
	case X86::COND_L:
	case X86::COND_LE:
	return true;
	}
	}

	static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
	switch (SetCCOpcode) {
	default: llvm_unreachable("Invalid integer condition!");
	case ISD::SETEQ: return X86::COND_E;
	case ISD::SETGT: return X86::COND_G;
	case ISD::SETGE: return X86::COND_GE;
	case ISD::SETLT: return X86::COND_L;
	case ISD::SETLE: return X86::COND_LE;
	case ISD::SETNE: return X86::COND_NE;
	case ISD::SETULT: return X86::COND_B;
	case ISD::SETUGT: return X86::COND_A;
	case ISD::SETULE: return X86::COND_BE;
	case ISD::SETUGE: return X86::COND_AE;
	}
	}

	/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
	/// condition code, returning the condition code and the LHS/RHS of the
	/// comparison to make.
	static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
	bool isFP, SDValue &LHS, SDValue &RHS,
	SelectionDAG &DAG) {
	if (!isFP) {
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
	// X > -1 -> X == 0, jump !sign.
	RHS = DAG.getConstant(0, DL, RHS.getValueType());
	return X86::COND_NS;
	}
	if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
	// X < 0 -> X == 0, jump on sign.
	return X86::COND_S;
	}
	if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
	// X >= 0 -> X == 0, jump on !sign.
	return X86::COND_NS;
	}
	if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
	// X < 1 -> X <= 0
	RHS = DAG.getConstant(0, DL, RHS.getValueType());
	return X86::COND_LE;
	}
	}

	return TranslateIntegerX86CC(SetCCOpcode);
	}

	// First determine if it is required or is profitable to flip the operands.

	// If LHS is a foldable load, but RHS is not, flip the condition.
	if (ISD::isNON_EXTLoad(LHS.getNode()) &&
	!ISD::isNON_EXTLoad(RHS.getNode())) {
	SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
	std::swap(LHS, RHS);
	}

	switch (SetCCOpcode) {
	default: break;
	case ISD::SETOLT:
	case ISD::SETOLE:
	case ISD::SETUGT:
	case ISD::SETUGE:
	std::swap(LHS, RHS);
	break;
	}

	// On a floating point condition, the flags are set as follows:
	// ZF PF CF op
	// 0 \| 0 \| 0 \| X > Y
	// 0 \| 0 \| 1 \| X < Y
	// 1 \| 0 \| 0 \| X == Y
	// 1 \| 1 \| 1 \| unordered
	switch (SetCCOpcode) {
	default: llvm_unreachable("Condcode should be pre-legalized away");
	case ISD::SETUEQ:
	case ISD::SETEQ: return X86::COND_E;
	case ISD::SETOLT: // flipped
	case ISD::SETOGT:
	case ISD::SETGT: return X86::COND_A;
	case ISD::SETOLE: // flipped
	case ISD::SETOGE:
	case ISD::SETGE: return X86::COND_AE;
	case ISD::SETUGT: // flipped
	case ISD::SETULT:
	case ISD::SETLT: return X86::COND_B;
	case ISD::SETUGE: // flipped
	case ISD::SETULE:
	case ISD::SETLE: return X86::COND_BE;
	case ISD::SETONE:
	case ISD::SETNE: return X86::COND_NE;
	case ISD::SETUO: return X86::COND_P;
	case ISD::SETO: return X86::COND_NP;
	case ISD::SETOEQ:
	case ISD::SETUNE: return X86::COND_INVALID;
	}
	}

	/// Is there a floating point cmov for the specific X86 condition code?
	/// Current x86 isa includes the following FP cmov instructions:
	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
	static bool hasFPCMov(unsigned X86CC) {
	switch (X86CC) {
	default:
	return false;
	case X86::COND_B:
	case X86::COND_BE:
	case X86::COND_E:
	case X86::COND_P:
	case X86::COND_A:
	case X86::COND_AE:
	case X86::COND_NE:
	case X86::COND_NP:
	return true;
	}
	}

	static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
	return Subtarget.hasVLX() \|\| Subtarget.canExtendTo512DQ() \|\|
	VT.is512BitVector();
	}

	bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	MachineFunction &MF,
	unsigned Intrinsic) const {
	Info.flags = MachineMemOperand::MONone;
	Info.offset = 0;

	const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
	if (!IntrData) {
	switch (Intrinsic) {
	case Intrinsic::x86_aesenc128kl:
	case Intrinsic::x86_aesdec128kl:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(1);
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOLoad;
	return true;
	case Intrinsic::x86_aesenc256kl:
	case Intrinsic::x86_aesdec256kl:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(1);
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOLoad;
	return true;
	case Intrinsic::x86_aesencwide128kl:
	case Intrinsic::x86_aesdecwide128kl:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(0);
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOLoad;
	return true;
	case Intrinsic::x86_aesencwide256kl:
	case Intrinsic::x86_aesdecwide256kl:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(0);
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOLoad;
	return true;
	case Intrinsic::x86_cmpccxadd32:
	case Intrinsic::x86_cmpccxadd64:
	case Intrinsic::x86_atomic_bts:
	case Intrinsic::x86_atomic_btc:
	case Intrinsic::x86_atomic_btr: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(0);
	unsigned Size = I.getType()->getScalarSizeInBits();
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
	Info.align = Align(Size);
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::x86_atomic_bts_rm:
	case Intrinsic::x86_atomic_btc_rm:
	case Intrinsic::x86_atomic_btr_rm: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(0);
	unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
	Info.align = Align(Size);
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::x86_aadd32:
	case Intrinsic::x86_aadd64:
	case Intrinsic::x86_aand32:
	case Intrinsic::x86_aand64:
	case Intrinsic::x86_aor32:
	case Intrinsic::x86_aor64:
	case Intrinsic::x86_axor32:
	case Intrinsic::x86_axor64:
	case Intrinsic::x86_atomic_add_cc:
	case Intrinsic::x86_atomic_sub_cc:
	case Intrinsic::x86_atomic_or_cc:
	case Intrinsic::x86_atomic_and_cc:
	case Intrinsic::x86_atomic_xor_cc: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = I.getArgOperand(0);
	unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
	Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
	Info.align = Align(Size);
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	}
	return false;
	}

	switch (IntrData->Type) {
	case TRUNCATE_TO_MEM_VI8:
	case TRUNCATE_TO_MEM_VI16:
	case TRUNCATE_TO_MEM_VI32: {
	Info.opc = ISD::INTRINSIC_VOID;
	Info.ptrVal = I.getArgOperand(0);
	MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
	MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
	if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
	ScalarVT = MVT::i8;
	else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
	ScalarVT = MVT::i16;
	else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
	ScalarVT = MVT::i32;

	Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOStore;
	break;
	}
	case GATHER:
	case GATHER_AVX2: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.ptrVal = nullptr;
	MVT DataVT = MVT::getVT(I.getType());
	MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
	unsigned NumElts = std::min(DataVT.getVectorNumElements(),
	IndexVT.getVectorNumElements());
	Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOLoad;
	break;
	}
	case SCATTER: {
	Info.opc = ISD::INTRINSIC_VOID;
	Info.ptrVal = nullptr;
	MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
	MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
	unsigned NumElts = std::min(DataVT.getVectorNumElements(),
	IndexVT.getVectorNumElements());
	Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
	Info.align = Align(1);
	Info.flags \|= MachineMemOperand::MOStore;
	break;
	}
	default:
	return false;
	}

	return true;
	}

	/// Returns true if the target can instruction select the
	/// specified FP immediate natively. If false, the legalizer will
	/// materialize the FP immediate as a load from a constant pool.
	bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
	bool ForCodeSize) const {
	for (const APFloat &FPImm : LegalFPImmediates)
	if (Imm.bitwiseIsEqual(FPImm))
	return true;
	return false;
	}

	bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
	ISD::LoadExtType ExtTy,
	EVT NewVT) const {
	assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");

	// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
	// relocation target a movq or addq instruction: don't let the load shrink.
	SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
	if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
	return GA->getTargetFlags() != X86II::MO_GOTTPOFF;

	// If this is an (1) AVX vector load with (2) multiple uses and (3) all of
	// those uses are extracted directly into a store, then the extract + store
	// can be store-folded. Therefore, it's probably not worth splitting the load.
	EVT VT = Load->getValueType(0);
	if ((VT.is256BitVector() \|\| VT.is512BitVector()) && !Load->hasOneUse()) {
	for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
	// Skip uses of the chain value. Result 0 of the node is the load value.
	if (UI.getUse().getResNo() != 0)
	continue;

	// If this use is not an extract + store, it's probably worth splitting.
	if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR \|\| !UI->hasOneUse() \|\|
	UI->use_begin()->getOpcode() != ISD::STORE)
	return true;
	}
	// All non-chain uses are extract + store.
	return false;
	}

	return true;
	}

	/// Returns true if it is beneficial to convert a load of a constant
	/// to just the constant itself.
	bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	assert(Ty->isIntegerTy());

	unsigned BitSize = Ty->getPrimitiveSizeInBits();
	if (BitSize == 0 \|\| BitSize > 64)
	return false;
	return true;
	}

	bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
	// If we are using XMM registers in the ABI and the condition of the select is
	// a floating-point compare and we have blendv or conditional move, then it is
	// cheaper to select instead of doing a cross-register move and creating a
	// load that depends on the compare result.
	bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
	return !IsFPSetCC \|\| !Subtarget.isTarget64BitLP64() \|\| !Subtarget.hasAVX();
	}

	bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
	// TODO: It might be a win to ease or lift this restriction, but the generic
	// folds in DAGCombiner conflict with vector folds for an AVX512 target.
	if (VT.isVector() && Subtarget.hasAVX512())
	return false;

	return true;
	}

	bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
	SDValue C) const {
	// TODO: We handle scalars using custom code, but generic combining could make
	// that unnecessary.
	APInt MulC;
	if (!ISD::isConstantSplatVector(C.getNode(), MulC))
	return false;

	// Find the type this will be legalized too. Otherwise we might prematurely
	// convert this to shl+add/sub and then still have to type legalize those ops.
	// Another choice would be to defer the decision for illegal types until
	// after type legalization. But constant splat vectors of i64 can't make it
	// through type legalization on 32-bit targets so we would need to special
	// case vXi64.
	while (getTypeAction(Context, VT) != TypeLegal)
	VT = getTypeToTransformTo(Context, VT);

	// If vector multiply is legal, assume that's faster than shl + add/sub.
	// Multiply is a complex op with higher latency and lower throughput in
	// most implementations, sub-vXi32 vector multiplies are always fast,
	// vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
	// is always going to be slow.
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
	(EltSizeInBits != 32 \|\| !Subtarget.isPMULLDSlow()))
	return false;

	// shl+add, shl+sub, shl+add+neg
	return (MulC + 1).isPowerOf2() \|\| (MulC - 1).isPowerOf2() \|\|
	(1 - MulC).isPowerOf2() \|\| (-(MulC + 1)).isPowerOf2();
	}

	bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
	unsigned Index) const {
	if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
	return false;

	// Mask vectors support all subregister combinations and operations that
	// extract half of vector.
	if (ResVT.getVectorElementType() == MVT::i1)
	return Index == 0 \|\| ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
	(Index == ResVT.getVectorNumElements()));

	return (Index % ResVT.getVectorNumElements()) == 0;
	}

	bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
	unsigned Opc = VecOp.getOpcode();

	// Assume target opcodes can't be scalarized.
	// TODO - do we have any exceptions?
	if (Opc >= ISD::BUILTIN_OP_END)
	return false;

	// If the vector op is not supported, try to convert to scalar.
	EVT VecVT = VecOp.getValueType();
	if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
	return true;

	// If the vector op is supported, but the scalar op is not, the transform may
	// not be worthwhile.
	EVT ScalarVT = VecVT.getScalarType();
	return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
	}

	bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
	bool) const {
	// TODO: Allow vectors?
	if (VT.isVector())
	return false;
	return VT.isSimple() \|\| !isOperationExpand(Opcode, VT);
	}

	bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
	// Speculate cttz only if we can directly use TZCNT or can promote to i32.
	return Subtarget.hasBMI() \|\|
	(!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
	}

	bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
	// Speculate ctlz only if we can directly use LZCNT.
	return Subtarget.hasLZCNT();
	}

	bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
	return VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT.isVector();
	}

	bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
	// Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
	// expensive than a straight movsd. On the other hand, it's important to
	// shrink long double fp constant since fldt is very slow.
	return !Subtarget.hasSSE2() \|\| VT == MVT::f80;
	}

	bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
	return (VT == MVT::f64 && Subtarget.hasSSE2()) \|\|
	(VT == MVT::f32 && Subtarget.hasSSE1()) \|\| VT == MVT::f16;
	}

	bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
	const SelectionDAG &DAG,
	const MachineMemOperand &MMO) const {
	if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
	BitcastVT.getVectorElementType() == MVT::i1)
	return false;

	if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
	return false;

	// If both types are legal vectors, it's always ok to convert them.
	if (LoadVT.isVector() && BitcastVT.isVector() &&
	isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
	return true;

	return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
	}

	bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
	const MachineFunction &MF) const {
	// Do not merge to float value size (128 bytes) if no implicit
	// float attribute is set.
	bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);

	if (NoFloat) {
	unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
	return (MemVT.getSizeInBits() <= MaxIntSize);
	}
	// Make sure we don't merge greater than our preferred vector
	// width.
	if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
	return false;

	return true;
	}

	bool X86TargetLowering::isCtlzFast() const {
	return Subtarget.hasFastLZCNT();
	}

	bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
	const Instruction &AndI) const {
	return true;
	}

	bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
	EVT VT = Y.getValueType();

	if (VT.isVector())
	return false;

	if (!Subtarget.hasBMI())
	return false;

	// There are only 32-bit and 64-bit forms for 'andn'.
	if (VT != MVT::i32 && VT != MVT::i64)
	return false;

	return !isa<ConstantSDNode>(Y);
	}

	bool X86TargetLowering::hasAndNot(SDValue Y) const {
	EVT VT = Y.getValueType();

	if (!VT.isVector())
	return hasAndNotCompare(Y);

	// Vector.

	if (!Subtarget.hasSSE1() \|\| VT.getSizeInBits() < 128)
	return false;

	if (VT == MVT::v4i32)
	return true;

	return Subtarget.hasSSE2();
	}

	bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
	return X.getValueType().isScalarInteger(); // 'bt'
	}

	bool X86TargetLowering::
	shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
	SelectionDAG &DAG) const {
	// Does baseline recommend not to perform the fold by default?
	if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
	return false;
	// For scalars this transform is always beneficial.
	if (X.getValueType().isScalarInteger())
	return true;
	// If all the shift amounts are identical, then transform is beneficial even
	// with rudimentary SSE2 shifts.
	if (DAG.isSplatValue(Y, /AllowUndefs=/true))
	return true;
	// If we have AVX2 with it's powerful shift operations, then it's also good.
	if (Subtarget.hasAVX2())
	return true;
	// Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
	return NewShiftOpcode == ISD::SHL;
	}

	bool X86TargetLowering::preferScalarizeSplat(unsigned Opc) const {
	return Opc != ISD::FP_EXTEND;
	}

	bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
	const SDNode *N, CombineLevel Level) const {
	assert(((N->getOpcode() == ISD::SHL &&
	N->getOperand(0).getOpcode() == ISD::SRL) \|\|
	(N->getOpcode() == ISD::SRL &&
	N->getOperand(0).getOpcode() == ISD::SHL)) &&
	"Expected shift-shift mask");
	// TODO: Should we always create i64 masks? Or only folded immediates?
	EVT VT = N->getValueType(0);
	if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) \|\|
	(Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
	// Only fold if the shift values are equal - so it folds to AND.
	// TODO - we should fold if either is a non-uniform vector but we don't do
	// the fold for non-splats yet.
	return N->getOperand(1) == N->getOperand(0).getOperand(1);
	}
	return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
	}

	bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
	EVT VT = Y.getValueType();

	// For vectors, we don't have a preference, but we probably want a mask.
	if (VT.isVector())
	return false;

	// 64-bit shifts on 32-bit targets produce really bad bloated code.
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	return false;

	return true;
	}

	TargetLowering::ShiftLegalizationStrategy
	X86TargetLowering::preferredShiftLegalizationStrategy(
	SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
	!Subtarget.isOSWindows())
	return ShiftLegalizationStrategy::LowerToLibcall;
	return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
	ExpansionFactor);
	}

	bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
	// Any legal vector type can be splatted more efficiently than
	// loading/spilling from memory.
	return isTypeLegal(VT);
	}

	MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
	MVT VT = MVT::getIntegerVT(NumBits);
	if (isTypeLegal(VT))
	return VT;

	// PMOVMSKB can handle this.
	if (NumBits == 128 && isTypeLegal(MVT::v16i8))
	return MVT::v16i8;

	// VPMOVMSKB can handle this.
	if (NumBits == 256 && isTypeLegal(MVT::v32i8))
	return MVT::v32i8;

	// TODO: Allow 64-bit type for 32-bit target.
	// TODO: 512-bit types should be allowed, but make sure that those
	// cases are handled in combineVectorSizedSetCCEquality().

	return MVT::INVALID_SIMPLE_VALUE_TYPE;
	}

	/// Val is the undef sentinel value or equal to the specified value.
	static bool isUndefOrEqual(int Val, int CmpVal) {
	return ((Val == SM_SentinelUndef) \|\| (Val == CmpVal));
	}

	/// Return true if every element in Mask is the undef sentinel value or equal to
	/// the specified value..
	static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
	return llvm::all_of(Mask, [CmpVal](int M) {
	return (M == SM_SentinelUndef) \|\| (M == CmpVal);
	});
	}

	/// Val is either the undef or zero sentinel value.
	static bool isUndefOrZero(int Val) {
	return ((Val == SM_SentinelUndef) \|\| (Val == SM_SentinelZero));
	}

	/// Return true if every element in Mask, beginning from position Pos and ending
	/// in Pos+Size is the undef sentinel value.
	static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
	return llvm::all_of(Mask.slice(Pos, Size),
	[](int M) { return M == SM_SentinelUndef; });
	}

	/// Return true if the mask creates a vector whose lower half is undefined.
	static bool isUndefLowerHalf(ArrayRef<int> Mask) {
	unsigned NumElts = Mask.size();
	return isUndefInRange(Mask, 0, NumElts / 2);
	}

	/// Return true if the mask creates a vector whose upper half is undefined.
	static bool isUndefUpperHalf(ArrayRef<int> Mask) {
	unsigned NumElts = Mask.size();
	return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
	}

	/// Return true if Val falls within the specified range (L, H].
	static bool isInRange(int Val, int Low, int Hi) {
	return (Val >= Low && Val < Hi);
	}

	/// Return true if the value of any element in Mask falls within the specified
	/// range (L, H].
	static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
	return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
	}

	/// Return true if the value of any element in Mask is the zero sentinel value.
	static bool isAnyZero(ArrayRef<int> Mask) {
	return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
	}

	/// Return true if the value of any element in Mask is the zero or undef
	/// sentinel values.
	static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
	return llvm::any_of(Mask, [](int M) {
	return M == SM_SentinelZero \|\| M == SM_SentinelUndef;
	});
	}

	/// Return true if Val is undef or if its value falls within the
	/// specified range (L, H].
	static bool isUndefOrInRange(int Val, int Low, int Hi) {
	return (Val == SM_SentinelUndef) \|\| isInRange(Val, Low, Hi);
	}

	/// Return true if every element in Mask is undef or if its value
	/// falls within the specified range (L, H].
	static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
	return llvm::all_of(
	Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
	}

	/// Return true if Val is undef, zero or if its value falls within the
	/// specified range (L, H].
	static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
	return isUndefOrZero(Val) \|\| isInRange(Val, Low, Hi);
	}

	/// Return true if every element in Mask is undef, zero or if its value
	/// falls within the specified range (L, H].
	static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
	return llvm::all_of(
	Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos + Size, falls within the specified
	/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
	static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
	unsigned Size, int Low, int Step = 1) {
	for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
	if (!isUndefOrEqual(Mask[i], Low))
	return false;
	return true;
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos+Size, falls within the specified
	/// sequential range (Low, Low+Size], or is undef or is zero.
	static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
	unsigned Size, int Low,
	int Step = 1) {
	for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
	if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
	return false;
	return true;
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos+Size is undef or is zero.
	static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
	unsigned Size) {
	return llvm::all_of(Mask.slice(Pos, Size), isUndefOrZero);
	}

	/// Helper function to test whether a shuffle mask could be
	/// simplified by widening the elements being shuffled.
	///
	/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
	/// leaves it in an unspecified state.
	///
	/// NOTE: This must handle normal vector shuffle masks and target vector
	/// shuffle masks. The latter have the special property of a '-2' representing
	/// a zero-ed lane of a vector.
	static bool canWidenShuffleElements(ArrayRef<int> Mask,
	SmallVectorImpl<int> &WidenedMask) {
	WidenedMask.assign(Mask.size() / 2, 0);
	for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
	int M0 = Mask[i];
	int M1 = Mask[i + 1];

	// If both elements are undef, its trivial.
	if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
	WidenedMask[i / 2] = SM_SentinelUndef;
	continue;
	}

	// Check for an undef mask and a mask value properly aligned to fit with
	// a pair of values. If we find such a case, use the non-undef mask's value.
	if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
	WidenedMask[i / 2] = M1 / 2;
	continue;
	}
	if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
	WidenedMask[i / 2] = M0 / 2;
	continue;
	}

	// When zeroing, we need to spread the zeroing across both lanes to widen.
	if (M0 == SM_SentinelZero \|\| M1 == SM_SentinelZero) {
	if ((M0 == SM_SentinelZero \|\| M0 == SM_SentinelUndef) &&
	(M1 == SM_SentinelZero \|\| M1 == SM_SentinelUndef)) {
	WidenedMask[i / 2] = SM_SentinelZero;
	continue;
	}
	return false;
	}

	// Finally check if the two mask values are adjacent and aligned with
	// a pair.
	if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
	WidenedMask[i / 2] = M0 / 2;
	continue;
	}

	// Otherwise we can't safely widen the elements used in this shuffle.
	return false;
	}
	assert(WidenedMask.size() == Mask.size() / 2 &&
	"Incorrect size of mask after widening the elements!");

	return true;
	}

	static bool canWidenShuffleElements(ArrayRef<int> Mask,
	const APInt &Zeroable,
	bool V2IsZero,
	SmallVectorImpl<int> &WidenedMask) {
	// Create an alternative mask with info about zeroable elements.
	// Here we do not set undef elements as zeroable.
	SmallVector<int, 64> ZeroableMask(Mask);
	if (V2IsZero) {
	assert(!Zeroable.isZero() && "V2's non-undef elements are used?!");
	for (int i = 0, Size = Mask.size(); i != Size; ++i)
	if (Mask[i] != SM_SentinelUndef && Zeroable[i])
	ZeroableMask[i] = SM_SentinelZero;
	}
	return canWidenShuffleElements(ZeroableMask, WidenedMask);
	}

	static bool canWidenShuffleElements(ArrayRef<int> Mask) {
	SmallVector<int, 32> WidenedMask;
	return canWidenShuffleElements(Mask, WidenedMask);
	}

	// Attempt to narrow/widen shuffle mask until it matches the target number of
	// elements.
	static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
	SmallVectorImpl<int> &ScaledMask) {
	unsigned NumSrcElts = Mask.size();
	assert(((NumSrcElts % NumDstElts) == 0 \|\| (NumDstElts % NumSrcElts) == 0) &&
	"Illegal shuffle scale factor");

	// Narrowing is guaranteed to work.
	if (NumDstElts >= NumSrcElts) {
	int Scale = NumDstElts / NumSrcElts;
	llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
	return true;
	}

	// We have to repeat the widening until we reach the target size, but we can
	// split out the first widening as it sets up ScaledMask for us.
	if (canWidenShuffleElements(Mask, ScaledMask)) {
	while (ScaledMask.size() > NumDstElts) {
	SmallVector<int, 16> WidenedMask;
	if (!canWidenShuffleElements(ScaledMask, WidenedMask))
	return false;
	ScaledMask = std::move(WidenedMask);
	}
	return true;
	}

	return false;
	}

	/// Returns true if Elt is a constant zero or a floating point constant +0.0.
	bool X86::isZeroNode(SDValue Elt) {
	return isNullConstant(Elt) \|\| isNullFPConstant(Elt);
	}

	// Build a vector of constants.
	// Use an UNDEF node if MaskElt == -1.
	// Split 64-bit constants in the 32-bit mode.
	static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
	const SDLoc &dl, bool IsMask = false) {

	SmallVector<SDValue, 32> Ops;
	bool Split = false;

	MVT ConstVecVT = VT;
	unsigned NumElts = VT.getVectorNumElements();
	bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
	if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
	ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
	Split = true;
	}

	MVT EltVT = ConstVecVT.getVectorElementType();
	for (unsigned i = 0; i < NumElts; ++i) {
	bool IsUndef = Values[i] < 0 && IsMask;
	SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
	DAG.getConstant(Values[i], dl, EltVT);
	Ops.push_back(OpNode);
	if (Split)
	Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
	DAG.getConstant(0, dl, EltVT));
	}
	SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
	if (Split)
	ConstsNode = DAG.getBitcast(VT, ConstsNode);
	return ConstsNode;
	}

	static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
	MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
	assert(Bits.size() == Undefs.getBitWidth() &&
	"Unequal constant and undef arrays");
	SmallVector<SDValue, 32> Ops;
	bool Split = false;

	MVT ConstVecVT = VT;
	unsigned NumElts = VT.getVectorNumElements();
	bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
	if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
	ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
	Split = true;
	}

	MVT EltVT = ConstVecVT.getVectorElementType();
	for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
	if (Undefs[i]) {
	Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
	continue;
	}
	const APInt &V = Bits[i];
	assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes");
	if (Split) {
	Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
	Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
	} else if (EltVT == MVT::f32) {
	APFloat FV(APFloat::IEEEsingle(), V);
	Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
	} else if (EltVT == MVT::f64) {
	APFloat FV(APFloat::IEEEdouble(), V);
	Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
	} else {
	Ops.push_back(DAG.getConstant(V, dl, EltVT));
	}
	}

	SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
	return DAG.getBitcast(VT, ConstsNode);
	}

	/// Returns a vector of specified type with all zero elements.
	static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector() \|\|
	VT.getVectorElementType() == MVT::i1) &&
	"Unexpected vector type");

	// Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
	// type. This ensures they get CSE'd. But if the integer type is not
	// available, use a floating-point +0.0 instead.
	SDValue Vec;
	if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
	Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
	} else if (VT.isFloatingPoint()) {
	Vec = DAG.getConstantFP(+0.0, dl, VT);
	} else if (VT.getVectorElementType() == MVT::i1) {
	assert((Subtarget.hasBWI() \|\| VT.getVectorNumElements() <= 16) &&
	"Unexpected vector type");
	Vec = DAG.getConstant(0, dl, VT);
	} else {
	unsigned Num32BitElts = VT.getSizeInBits() / 32;
	Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
	}
	return DAG.getBitcast(VT, Vec);
	}

	// Helper to determine if the ops are all the extracted subvectors come from a
	// single source. If we allow commute they don't have to be in order (Lo/Hi).
	static SDValue getSplitVectorSrc(SDValue LHS, SDValue RHS, bool AllowCommute) {
	if (LHS.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	RHS.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	LHS.getValueType() != RHS.getValueType() \|\|
	LHS.getOperand(0) != RHS.getOperand(0))
	return SDValue();

	SDValue Src = LHS.getOperand(0);
	if (Src.getValueSizeInBits() != (LHS.getValueSizeInBits() * 2))
	return SDValue();

	unsigned NumElts = LHS.getValueType().getVectorNumElements();
	if ((LHS.getConstantOperandAPInt(1) == 0 &&
	RHS.getConstantOperandAPInt(1) == NumElts) \|\|
	(AllowCommute && RHS.getConstantOperandAPInt(1) == 0 &&
	LHS.getConstantOperandAPInt(1) == NumElts))
	return Src;

	return SDValue();
	}

	static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
	const SDLoc &dl, unsigned vectorWidth) {
	EVT VT = Vec.getValueType();
	EVT ElVT = VT.getVectorElementType();
	unsigned Factor = VT.getSizeInBits() / vectorWidth;
	EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
	VT.getVectorNumElements() / Factor);

	// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
	unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

	// This is the index of the first element of the vectorWidth-bit chunk
	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
	IdxVal &= ~(ElemsPerChunk - 1);

	// If the input is a buildvector just emit a smaller one.
	if (Vec.getOpcode() == ISD::BUILD_VECTOR)
	return DAG.getBuildVector(ResultVT, dl,
	Vec->ops().slice(IdxVal, ElemsPerChunk));

	SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
	}

	/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
	/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
	/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
	/// instructions or a simple subregister reference. Idx is an index in the
	/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
	/// lowering EXTRACT_VECTOR_ELT operations easier.
	static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert((Vec.getValueType().is256BitVector() \|\|
	Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
	return extractSubVector(Vec, IdxVal, DAG, dl, 128);
	}

	/// Generate a DAG to grab 256-bits from a 512-bit vector.
	static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
	return extractSubVector(Vec, IdxVal, DAG, dl, 256);
	}

	static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl,
	unsigned vectorWidth) {
	assert((vectorWidth == 128 \|\| vectorWidth == 256) &&
	"Unsupported vector width");
	// Inserting UNDEF is Result
	if (Vec.isUndef())
	return Result;
	EVT VT = Vec.getValueType();
	EVT ElVT = VT.getVectorElementType();
	EVT ResultVT = Result.getValueType();

	// Insert the relevant vectorWidth bits.
	unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

	// This is the index of the first element of the vectorWidth-bit chunk
	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
	IdxVal &= ~(ElemsPerChunk - 1);

	SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
	}

	/// Generate a DAG to put 128-bits into a vector > 128 bits. This
	/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
	/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
	/// simple superregister reference. Idx is an index in the 128 bits
	/// we want. It need not be aligned to a 128-bit boundary. That makes
	/// lowering INSERT_VECTOR_ELT operations easier.
	static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
	return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
	}

	/// Widen a vector to a larger size with the same scalar type, with the new
	/// elements either zero or undef.
	static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	const SDLoc &dl) {
	assert(Vec.getValueSizeInBits().getFixedValue() < VT.getFixedSizeInBits() &&
	Vec.getValueType().getScalarType() == VT.getScalarType() &&
	"Unsupported vector widening type");
	SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
	: DAG.getUNDEF(VT);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	/// Widen a vector to a larger size with the same scalar type, with the new
	/// elements either zero or undef.
	static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	const SDLoc &dl, unsigned WideSizeInBits) {
	assert(Vec.getValueSizeInBits() < WideSizeInBits &&
	(WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&
	"Unsupported vector widening type");
	unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
	MVT SVT = Vec.getSimpleValueType().getScalarType();
	MVT VT = MVT::getVectorVT(SVT, WideNumElts);
	return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
	}

	// Helper function to collect subvector ops that are concatenated together,
	// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
	// The subvectors in Ops are guaranteed to be the same type.
	static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
	SelectionDAG &DAG) {
	assert(Ops.empty() && "Expected an empty ops vector");

	if (N->getOpcode() == ISD::CONCAT_VECTORS) {
	Ops.append(N->op_begin(), N->op_end());
	return true;
	}

	if (N->getOpcode() == ISD::INSERT_SUBVECTOR) {
	SDValue Src = N->getOperand(0);
	SDValue Sub = N->getOperand(1);
	const APInt &Idx = N->getConstantOperandAPInt(2);
	EVT VT = Src.getValueType();
	EVT SubVT = Sub.getValueType();

	// TODO - Handle more general insert_subvector chains.
	if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2)) {
	// insert_subvector(undef, x, lo)
	if (Idx == 0 && Src.isUndef()) {
	Ops.push_back(Sub);
	Ops.push_back(DAG.getUNDEF(SubVT));
	return true;
	}
	if (Idx == (VT.getVectorNumElements() / 2)) {
	// insert_subvector(insert_subvector(undef, x, lo), y, hi)
	if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
	Src.getOperand(1).getValueType() == SubVT &&
	isNullConstant(Src.getOperand(2))) {
	Ops.push_back(Src.getOperand(1));
	Ops.push_back(Sub);
	return true;
	}
	// insert_subvector(x, extract_subvector(x, lo), hi)
	if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
	Ops.append(2, Sub);
	return true;
	}
	// insert_subvector(undef, x, hi)
	if (Src.isUndef()) {
	Ops.push_back(DAG.getUNDEF(SubVT));
	Ops.push_back(Sub);
	return true;
	}
	}
	}
	}

	return false;
	}

	static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
	const SDLoc &dl) {
	EVT VT = Op.getValueType();
	unsigned NumElems = VT.getVectorNumElements();
	unsigned SizeInBits = VT.getSizeInBits();
	assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
	"Can't split odd sized vector");

	// If this is a splat value (with no-undefs) then use the lower subvector,
	// which should be a free extraction.
	SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
	if (DAG.isSplatValue(Op, /AllowUndefs/ false))
	return std::make_pair(Lo, Lo);

	SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
	return std::make_pair(Lo, Hi);
	}

	/// Break an operation into 2 half sized ops and then concatenate the results.
	static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG) {
	unsigned NumOps = Op.getNumOperands();
	EVT VT = Op.getValueType();
	SDLoc dl(Op);

	// Extract the LHS Lo/Hi vectors
	SmallVector<SDValue> LoOps(NumOps, SDValue());
	SmallVector<SDValue> HiOps(NumOps, SDValue());
	for (unsigned I = 0; I != NumOps; ++I) {
	SDValue SrcOp = Op.getOperand(I);
	if (!SrcOp.getValueType().isVector()) {
	LoOps[I] = HiOps[I] = SrcOp;
	continue;
	}
	std::tie(LoOps[I], HiOps[I]) = splitVector(SrcOp, DAG, dl);
	}

	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, LoVT, LoOps),
	DAG.getNode(Op.getOpcode(), dl, HiVT, HiOps));
	}

	/// Break an unary integer operation into 2 half sized ops and then
	/// concatenate the result back.
	static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
	// Make sure we only try to split 256/512-bit types to avoid creating
	// narrow vectors.
	EVT VT = Op.getValueType();
	(void)VT;
	assert((Op.getOperand(0).getValueType().is256BitVector() \|\|
	Op.getOperand(0).getValueType().is512BitVector()) &&
	(VT.is256BitVector() \|\| VT.is512BitVector()) && "Unsupported VT!");
	assert(Op.getOperand(0).getValueType().getVectorNumElements() ==
	VT.getVectorNumElements() &&
	"Unexpected VTs!");
	return splitVectorOp(Op, DAG);
	}

	/// Break a binary integer operation into 2 half sized ops and then
	/// concatenate the result back.
	static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
	// Assert that all the types match.
	EVT VT = Op.getValueType();
	(void)VT;
	assert(Op.getOperand(0).getValueType() == VT &&
	Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");
	assert((VT.is256BitVector() \|\| VT.is512BitVector()) && "Unsupported VT!");
	return splitVectorOp(Op, DAG);
	}

	// Helper for splitting operands of an operation to legal target size and
	// apply a function on each part.
	// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
	// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
	// deciding if/how to split Ops. Ops elements do not have to be of type VT.
	// The argument Builder is a function that will be applied on each split part:
	// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
	template <typename F>
	SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
	const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
	F Builder, bool CheckBWI = true) {
	assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2");
	unsigned NumSubs = 1;
	if ((CheckBWI && Subtarget.useBWIRegs()) \|\|
	(!CheckBWI && Subtarget.useAVX512Regs())) {
	if (VT.getSizeInBits() > 512) {
	NumSubs = VT.getSizeInBits() / 512;
	assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");
	}
	} else if (Subtarget.hasAVX2()) {
	if (VT.getSizeInBits() > 256) {
	NumSubs = VT.getSizeInBits() / 256;
	assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size");
	}
	} else {
	if (VT.getSizeInBits() > 128) {
	NumSubs = VT.getSizeInBits() / 128;
	assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size");
	}
	}

	if (NumSubs == 1)
	return Builder(DAG, DL, Ops);

	SmallVector<SDValue, 4> Subs;
	for (unsigned i = 0; i != NumSubs; ++i) {
	SmallVector<SDValue, 2> SubOps;
	for (SDValue Op : Ops) {
	EVT OpVT = Op.getValueType();
	unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
	unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
	SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
	}
	Subs.push_back(Builder(DAG, DL, SubOps));
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
	}

	// Helper function that extends a non-512-bit vector op to 512-bits on non-VLX
	// targets.
	static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT,
	ArrayRef<SDValue> Ops, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(Subtarget.hasAVX512() && "AVX512 target expected");
	MVT SVT = VT.getScalarType();

	// If we have a 32/64 splatted constant, splat it to DstTy to
	// encourage a foldable broadcast'd operand.
	auto MakeBroadcastOp = [&](SDValue Op, MVT OpVT, MVT DstVT) {
	unsigned OpEltSizeInBits = OpVT.getScalarSizeInBits();
	// AVX512 broadcasts 32/64-bit operands.
	// TODO: Support float once getAVX512Node is used by fp-ops.
	if (!OpVT.isInteger() \|\| OpEltSizeInBits < 32 \|\|
	!DAG.getTargetLoweringInfo().isTypeLegal(SVT))
	return SDValue();
	// If we're not widening, don't bother if we're not bitcasting.
	if (OpVT == DstVT && Op.getOpcode() != ISD::BITCAST)
	return SDValue();
	if (auto *BV = dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Op))) {
	APInt SplatValue, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
	HasAnyUndefs, OpEltSizeInBits) &&
	!HasAnyUndefs && SplatValue.getBitWidth() == OpEltSizeInBits)
	return DAG.getConstant(SplatValue, DL, DstVT);
	}
	return SDValue();
	};

	bool Widen = !(Subtarget.hasVLX() \|\| VT.is512BitVector());

	MVT DstVT = VT;
	if (Widen)
	DstVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits());

	// Canonicalize src operands.
	SmallVector<SDValue> SrcOps(Ops.begin(), Ops.end());
	for (SDValue &Op : SrcOps) {
	MVT OpVT = Op.getSimpleValueType();
	// Just pass through scalar operands.
	if (!OpVT.isVector())
	continue;
	assert(OpVT == VT && "Vector type mismatch");

	if (SDValue BroadcastOp = MakeBroadcastOp(Op, OpVT, DstVT)) {
	Op = BroadcastOp;
	continue;
	}

	// Just widen the subvector by inserting into an undef wide vector.
	if (Widen)
	Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512);
	}

	SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps);

	// Perform the 512-bit op then extract the bottom subvector.
	if (Widen)
	Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
	return Res;
	}

	/// Insert i1-subvector to i1-vector.
	static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {

	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	SDValue SubVec = Op.getOperand(1);
	SDValue Idx = Op.getOperand(2);
	unsigned IdxVal = Op.getConstantOperandVal(2);

	// Inserting undef is a nop. We can just return the original vector.
	if (SubVec.isUndef())
	return Vec;

	if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
	return Op;

	MVT OpVT = Op.getSimpleValueType();
	unsigned NumElems = OpVT.getVectorNumElements();
	SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);

	// Extend to natively supported kshift.
	MVT WideOpVT = OpVT;
	if ((!Subtarget.hasDQI() && NumElems == 8) \|\| NumElems < 8)
	WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;

	// Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
	// if necessary.
	if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
	// May need to promote to a legal type.
	Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	DAG.getConstant(0, dl, WideOpVT),
	SubVec, Idx);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
	}

	MVT SubVecVT = SubVec.getSimpleValueType();
	unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
	assert(IdxVal + SubVecNumElems <= NumElems &&
	IdxVal % SubVecVT.getSizeInBits() == 0 &&
	"Unexpected index value in INSERT_SUBVECTOR");

	SDValue Undef = DAG.getUNDEF(WideOpVT);

	if (IdxVal == 0) {
	// Zero lower bits of the Vec
	SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
	ZeroIdx);
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
	// Merge them together, SubVec should be zero extended.
	SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	DAG.getConstant(0, dl, WideOpVT),
	SubVec, ZeroIdx);
	Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
	}

	SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	Undef, SubVec, ZeroIdx);

	if (Vec.isUndef()) {
	assert(IdxVal != 0 && "Unexpected index");
	SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
	}

	if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
	assert(IdxVal != 0 && "Unexpected index");
	// If upper elements of Vec are known undef, then just shift into place.
	if (llvm::all_of(Vec->ops().slice(IdxVal + SubVecNumElems),
	[](SDValue V) { return V.isUndef(); })) {
	SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));
	} else {
	NumElems = WideOpVT.getVectorNumElements();
	unsigned ShiftLeft = NumElems - SubVecNumElems;
	unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
	SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
	if (ShiftRight != 0)
	SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
	}
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
	}

	// Simple case when we put subvector in the upper part
	if (IdxVal + SubVecNumElems == NumElems) {
	SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));
	if (SubVecNumElems * 2 == NumElems) {
	// Special case, use legal zero extending insert_subvector. This allows
	// isel to optimize when bits are known zero.
	Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	DAG.getConstant(0, dl, WideOpVT),
	Vec, ZeroIdx);
	} else {
	// Otherwise use explicit shifts to zero the bits.
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	Undef, Vec, ZeroIdx);
	NumElems = WideOpVT.getVectorNumElements();
	SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
	}
	Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
	}

	// Inserting into the middle is more complicated.

	NumElems = WideOpVT.getVectorNumElements();

	// Widen the vector if needed.
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);

	unsigned ShiftLeft = NumElems - SubVecNumElems;
	unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;

	// Do an optimization for the the most frequently used types.
	if (WideOpVT != MVT::v64i1 \|\| Subtarget.is64Bit()) {
	APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems);
	Mask0.flipAllBits();
	SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems));
	SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0);
	Vec = DAG.getNode(ISD::AND, dl, WideOpVT, Vec, VMask0);
	SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
	SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
	Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);

	// Reduce to original width if needed.
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
	}

	// Clear the upper bits of the subvector and move it to its insert position.
	SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
	SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
	DAG.getTargetConstant(ShiftRight, dl, MVT::i8));

	// Isolate the bits below the insertion point.
	unsigned LowShift = NumElems - IdxVal;
	SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec,
	DAG.getTargetConstant(LowShift, dl, MVT::i8));
	Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low,
	DAG.getTargetConstant(LowShift, dl, MVT::i8));

	// Isolate the bits after the last inserted bit.
	unsigned HighShift = IdxVal + SubVecNumElems;
	SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
	DAG.getTargetConstant(HighShift, dl, MVT::i8));
	High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,
	DAG.getTargetConstant(HighShift, dl, MVT::i8));

	// Now OR all 3 pieces together.
	Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);
	SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec);

	// Reduce to original width if needed.
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
	}

	static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG,
	const SDLoc &dl) {
	assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch");
	EVT SubVT = V1.getValueType();
	EVT SubSVT = SubVT.getScalarType();
	unsigned SubNumElts = SubVT.getVectorNumElements();
	unsigned SubVectorWidth = SubVT.getSizeInBits();
	EVT VT = EVT::getVectorVT(DAG.getContext(), SubSVT, 2 SubNumElts);
	SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth);
	return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth);
	}

	/// Returns a vector of specified type with all bits set.
	/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
	/// Then bitcast to their original type, ensuring they get CSE'd.
	static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()) &&
	"Expected a 128/256/512-bit vector type");

	APInt Ones = APInt::getAllOnes(32);
	unsigned NumElts = VT.getSizeInBits() / 32;
	SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
	return DAG.getBitcast(VT, Vec);
	}

	static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT,
	SDValue In, SelectionDAG &DAG) {
	EVT InVT = In.getValueType();
	assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
	assert((ISD::ANY_EXTEND == Opcode \|\| ISD::SIGN_EXTEND == Opcode \|\|
	ISD::ZERO_EXTEND == Opcode) &&
	"Unknown extension opcode");

	// For 256-bit vectors, we only need the lower (128-bit) input half.
	// For 512-bit vectors, we only need the lower input half or quarter.
	if (InVT.getSizeInBits() > 128) {
	assert(VT.getSizeInBits() == InVT.getSizeInBits() &&
	"Expected VTs to be the same size!");
	unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
	In = extractSubVector(In, 0, DAG, DL,
	std::max(128U, (unsigned)VT.getSizeInBits() / Scale));
	InVT = In.getValueType();
	}

	if (VT.getVectorNumElements() != InVT.getVectorNumElements())
	Opcode = DAG.getOpcode_EXTEND_VECTOR_INREG(Opcode);

	return DAG.getNode(Opcode, DL, VT, In);
	}

	// Match (xor X, -1) -> X.
	// Match extract_subvector(xor X, -1) -> extract_subvector(X).
	// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
	static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
	V = peekThroughBitcasts(V);
	if (V.getOpcode() == ISD::XOR &&
	(ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) \|\|
	isAllOnesConstant(V.getOperand(1))))
	return V.getOperand(0);
	if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	(isNullConstant(V.getOperand(1)) \|\| V.getOperand(0).hasOneUse())) {
	if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
	Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
	Not, V.getOperand(1));
	}
	}
	SmallVector<SDValue, 2> CatOps;
	if (collectConcatOps(V.getNode(), CatOps, DAG)) {
	for (SDValue &CatOp : CatOps) {
	SDValue NotCat = IsNOT(CatOp, DAG);
	if (!NotCat) return SDValue();
	CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
	}
	return SDValue();
	}

	void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask,
	bool Lo, bool Unary) {
	assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&
	"Illegal vector type to unpack");
	assert(Mask.empty() && "Expected an empty shuffle mask vector");
	int NumElts = VT.getVectorNumElements();
	int NumEltsInLane = 128 / VT.getScalarSizeInBits();
	for (int i = 0; i < NumElts; ++i) {
	unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
	int Pos = (i % NumEltsInLane) / 2 + LaneStart;
	Pos += (Unary ? 0 : NumElts * (i % 2));
	Pos += (Lo ? 0 : NumEltsInLane / 2);
	Mask.push_back(Pos);
	}
	}

	/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
	/// imposed by AVX and specific to the unary pattern. Example:
	/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
	/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
	void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
	bool Lo) {
	assert(Mask.empty() && "Expected an empty shuffle mask vector");
	int NumElts = VT.getVectorNumElements();
	for (int i = 0; i < NumElts; ++i) {
	int Pos = i / 2;
	Pos += (Lo ? 0 : NumElts / 2);
	Mask.push_back(Pos);
	}
	}

	// Attempt to constant fold, else just create a VECTOR_SHUFFLE.
	static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl,
	SDValue V1, SDValue V2, ArrayRef<int> Mask) {
	if ((ISD::isBuildVectorOfConstantSDNodes(V1.getNode()) \|\| V1.isUndef()) &&
	(ISD::isBuildVectorOfConstantSDNodes(V2.getNode()) \|\| V2.isUndef())) {
	SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType()));
	for (int I = 0, NumElts = Mask.size(); I != NumElts; ++I) {
	int M = Mask[I];
	if (M < 0)
	continue;
	SDValue V = (M < NumElts) ? V1 : V2;
	if (V.isUndef())
	continue;
	Ops[I] = V.getOperand(M % NumElts);
	}
	return DAG.getBuildVector(VT, dl, Ops);
	}

	return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
	}

	/// Returns a vector_shuffle node for an unpackl operation.
	static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
	SDValue V1, SDValue V2) {
	SmallVector<int, 8> Mask;
	createUnpackShuffleMask(VT, Mask, /* Lo = / true, / Unary = */ false);
	return getVectorShuffle(DAG, VT, dl, V1, V2, Mask);
	}

	/// Returns a vector_shuffle node for an unpackh operation.
	static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
	SDValue V1, SDValue V2) {
	SmallVector<int, 8> Mask;
	createUnpackShuffleMask(VT, Mask, /* Lo = / false, / Unary = */ false);
	return getVectorShuffle(DAG, VT, dl, V1, V2, Mask);
	}

	/// Returns a node that packs the LHS + RHS nodes together at half width.
	/// May return X86ISD::PACKSS/PACKUS, packing the top/bottom half.
	/// TODO: Add subvector splitting if/when we have a need for it.
	static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
	const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS,
	bool PackHiHalf = false) {
	MVT OpVT = LHS.getSimpleValueType();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	bool UsePackUS = Subtarget.hasSSE41() \|\| EltSizeInBits == 8;
	assert(OpVT == RHS.getSimpleValueType() &&
	VT.getSizeInBits() == OpVT.getSizeInBits() &&
	(EltSizeInBits * 2) == OpVT.getScalarSizeInBits() &&
	"Unexpected PACK operand types");
	assert((EltSizeInBits == 8 \|\| EltSizeInBits == 16 \|\| EltSizeInBits == 32) &&
	"Unexpected PACK result type");

	// Rely on vector shuffles for vXi64 -> vXi32 packing.
	if (EltSizeInBits == 32) {
	SmallVector<int> PackMask;
	int Offset = PackHiHalf ? 1 : 0;
	int NumElts = VT.getVectorNumElements();
	for (int I = 0; I != NumElts; I += 4) {
	PackMask.push_back(I + Offset);
	PackMask.push_back(I + Offset + 2);
	PackMask.push_back(I + Offset + NumElts);
	PackMask.push_back(I + Offset + NumElts + 2);
	}
	return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, LHS),
	DAG.getBitcast(VT, RHS), PackMask);
	}

	// See if we already have sufficient leading bits for PACKSS/PACKUS.
	if (!PackHiHalf) {
	if (UsePackUS &&
	DAG.computeKnownBits(LHS).countMaxActiveBits() <= EltSizeInBits &&
	DAG.computeKnownBits(RHS).countMaxActiveBits() <= EltSizeInBits)
	return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);

	if (DAG.ComputeMaxSignificantBits(LHS) <= EltSizeInBits &&
	DAG.ComputeMaxSignificantBits(RHS) <= EltSizeInBits)
	return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
	}

	// Fallback to sign/zero extending the requested half and pack.
	SDValue Amt = DAG.getTargetConstant(EltSizeInBits, dl, MVT::i8);
	if (UsePackUS) {
	if (PackHiHalf) {
	LHS = DAG.getNode(X86ISD::VSRLI, dl, OpVT, LHS, Amt);
	RHS = DAG.getNode(X86ISD::VSRLI, dl, OpVT, RHS, Amt);
	} else {
	SDValue Mask = DAG.getConstant((1ULL << EltSizeInBits) - 1, dl, OpVT);
	LHS = DAG.getNode(ISD::AND, dl, OpVT, LHS, Mask);
	RHS = DAG.getNode(ISD::AND, dl, OpVT, RHS, Mask);
	};
	return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);
	};

	if (!PackHiHalf) {
	LHS = DAG.getNode(X86ISD::VSHLI, dl, OpVT, LHS, Amt);
	RHS = DAG.getNode(X86ISD::VSHLI, dl, OpVT, RHS, Amt);
	}
	LHS = DAG.getNode(X86ISD::VSRAI, dl, OpVT, LHS, Amt);
	RHS = DAG.getNode(X86ISD::VSRAI, dl, OpVT, RHS, Amt);
	return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
	}

	/// Return a vector_shuffle of the specified vector of zero or undef vector.
	/// This produces a shuffle where the low element of V2 is swizzled into the
	/// zero/undef vector, landing at element Idx.
	/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
	static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
	bool IsZero,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = V2.getSimpleValueType();
	SDValue V1 = IsZero
	? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
	int NumElems = VT.getVectorNumElements();
	SmallVector<int, 16> MaskVec(NumElems);
	for (int i = 0; i != NumElems; ++i)
	// If this is the insertion idx, put the low elt of V2 here.
	MaskVec[i] = (i == Idx) ? NumElems : i;
	return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
	}

	static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
	if (Ptr.getOpcode() == X86ISD::Wrapper \|\|
	Ptr.getOpcode() == X86ISD::WrapperRIP)
	Ptr = Ptr.getOperand(0);

	auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
	if (!CNode \|\| CNode->isMachineConstantPoolEntry() \|\| CNode->getOffset() != 0)
	return nullptr;

	return CNode->getConstVal();
	}

	static const Constant getTargetConstantFromNode(LoadSDNode Load) {
	if (!Load \|\| !ISD::isNormalLoad(Load))
	return nullptr;
	return getTargetConstantFromBasePtr(Load->getBasePtr());
	}

	static const Constant *getTargetConstantFromNode(SDValue Op) {
	Op = peekThroughBitcasts(Op);
	return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
	}

	const Constant *
	X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
	assert(LD && "Unexpected null LoadSDNode");
	return getTargetConstantFromNode(LD);
	}

	// Extract raw constant bits from constant pools.
	static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
	APInt &UndefElts,
	SmallVectorImpl<APInt> &EltBits,
	bool AllowWholeUndefs = true,
	bool AllowPartialUndefs = true) {
	assert(EltBits.empty() && "Expected an empty EltBits vector");

	Op = peekThroughBitcasts(Op);

	EVT VT = Op.getValueType();
	unsigned SizeInBits = VT.getSizeInBits();
	assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
	unsigned NumElts = SizeInBits / EltSizeInBits;

	// Bitcast a source array of element bits to the target size.
	auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
	unsigned NumSrcElts = UndefSrcElts.getBitWidth();
	unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
	assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
	"Constant bit sizes don't match");

	// Don't split if we don't allow undef bits.
	bool AllowUndefs = AllowWholeUndefs \|\| AllowPartialUndefs;
	if (UndefSrcElts.getBoolValue() && !AllowUndefs)
	return false;

	// If we're already the right size, don't bother bitcasting.
	if (NumSrcElts == NumElts) {
	UndefElts = UndefSrcElts;
	EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
	return true;
	}

	// Extract all the undef/constant element data and pack into single bitsets.
	APInt UndefBits(SizeInBits, 0);
	APInt MaskBits(SizeInBits, 0);

	for (unsigned i = 0; i != NumSrcElts; ++i) {
	unsigned BitOffset = i * SrcEltSizeInBits;
	if (UndefSrcElts[i])
	UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
	MaskBits.insertBits(SrcEltBits[i], BitOffset);
	}

	// Split the undef/constant single bitset data into the target elements.
	UndefElts = APInt(NumElts, 0);
	EltBits.resize(NumElts, APInt(EltSizeInBits, 0));

	for (unsigned i = 0; i != NumElts; ++i) {
	unsigned BitOffset = i * EltSizeInBits;
	APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);

	// Only treat an element as UNDEF if all bits are UNDEF.
	if (UndefEltBits.isAllOnes()) {
	if (!AllowWholeUndefs)
	return false;
	UndefElts.setBit(i);
	continue;
	}

	// If only some bits are UNDEF then treat them as zero (or bail if not
	// supported).
	if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
	return false;

	EltBits[i] = MaskBits.extractBits(EltSizeInBits, BitOffset);
	}
	return true;
	};

	// Collect constant bits and insert into mask/undef bit masks.
	auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
	unsigned UndefBitIndex) {
	if (!Cst)
	return false;
	if (isa<UndefValue>(Cst)) {
	Undefs.setBit(UndefBitIndex);
	return true;
	}
	if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
	Mask = CInt->getValue();
	return true;
	}
	if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
	Mask = CFP->getValueAPF().bitcastToAPInt();
	return true;
	}
	return false;
	};

	// Handle UNDEFs.
	if (Op.isUndef()) {
	APInt UndefSrcElts = APInt::getAllOnes(NumElts);
	SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	// Extract scalar constant bits.
	if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
	APInt UndefSrcElts = APInt::getZero(1);
	SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
	return CastBitData(UndefSrcElts, SrcEltBits);
	}
	if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
	APInt UndefSrcElts = APInt::getZero(1);
	APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
	SmallVector<APInt, 64> SrcEltBits(1, RawBits);
	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	// Extract constant bits from build vector.
	if (auto *BV = dyn_cast<BuildVectorSDNode>(Op)) {
	BitVector Undefs;
	SmallVector<APInt> SrcEltBits;
	unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
	if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) {
	APInt UndefSrcElts = APInt::getNullValue(SrcEltBits.size());
	for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I)
	if (Undefs[I])
	UndefSrcElts.setBit(I);
	return CastBitData(UndefSrcElts, SrcEltBits);
	}
	}

	// Extract constant bits from constant pool vector.
	if (auto *Cst = getTargetConstantFromNode(Op)) {
	Type *CstTy = Cst->getType();
	unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
	if (!CstTy->isVectorTy() \|\| (CstSizeInBits % SizeInBits) != 0)
	return false;

	unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
	unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
	for (unsigned i = 0; i != NumSrcElts; ++i)
	if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
	UndefSrcElts, i))
	return false;

	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	// Extract constant bits from a broadcasted constant pool scalar.
	if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD &&
	EltSizeInBits <= VT.getScalarSizeInBits()) {
	auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
	if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits())
	return false;

	SDValue Ptr = MemIntr->getBasePtr();
	if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) {
	unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits();
	unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
	if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) {
	if (UndefSrcElts[0])
	UndefSrcElts.setBits(0, NumSrcElts);
	SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
	return CastBitData(UndefSrcElts, SrcEltBits);
	}
	}
	}

	// Extract constant bits from a subvector broadcast.
	if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
	auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
	SDValue Ptr = MemIntr->getBasePtr();
	// The source constant may be larger than the subvector broadcast,
	// ensure we extract the correct subvector constants.
	if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) {
	Type *CstTy = Cst->getType();
	unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
	unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
	if (!CstTy->isVectorTy() \|\| (CstSizeInBits % SubVecSizeInBits) != 0 \|\|
	(SizeInBits % SubVecSizeInBits) != 0)
	return false;
	unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
	unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
	unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
	APInt UndefSubElts(NumSubElts, 0);
	SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs,
	APInt(CstEltSizeInBits, 0));
	for (unsigned i = 0; i != NumSubElts; ++i) {
	if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],
	UndefSubElts, i))
	return false;
	for (unsigned j = 1; j != NumSubVecs; ++j)
	SubEltBits[i + (j * NumSubElts)] = SubEltBits[i];
	}
	UndefSubElts = APInt::getSplat(NumSubVecs * UndefSubElts.getBitWidth(),
	UndefSubElts);
	return CastBitData(UndefSubElts, SubEltBits);
	}
	}

	// Extract a rematerialized scalar constant insertion.
	if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
	Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
	isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
	unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
	unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits;
	auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
	SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
	SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	// Insert constant bits from a base and sub vector sources.
	if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) {
	// If bitcasts to larger elements we might lose track of undefs - don't
	// allow any to be safe.
	unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
	bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits;

	APInt UndefSrcElts, UndefSubElts;
	SmallVector<APInt, 32> EltSrcBits, EltSubBits;
	if (getTargetConstantBitsFromNode(Op.getOperand(1), SrcEltSizeInBits,
	UndefSubElts, EltSubBits,
	AllowWholeUndefs && AllowUndefs,
	AllowPartialUndefs && AllowUndefs) &&
	getTargetConstantBitsFromNode(Op.getOperand(0), SrcEltSizeInBits,
	UndefSrcElts, EltSrcBits,
	AllowWholeUndefs && AllowUndefs,
	AllowPartialUndefs && AllowUndefs)) {
	unsigned BaseIdx = Op.getConstantOperandVal(2);
	UndefSrcElts.insertBits(UndefSubElts, BaseIdx);
	for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
	EltSrcBits[BaseIdx + i] = EltSubBits[i];
	return CastBitData(UndefSrcElts, EltSrcBits);
	}
	}

	// Extract constant bits from a subvector's source.
	if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	// TODO - support extract_subvector through bitcasts.
	if (EltSizeInBits != VT.getScalarSizeInBits())
	return false;

	if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
	UndefElts, EltBits, AllowWholeUndefs,
	AllowPartialUndefs)) {
	EVT SrcVT = Op.getOperand(0).getValueType();
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	unsigned NumSubElts = VT.getVectorNumElements();
	unsigned BaseIdx = Op.getConstantOperandVal(1);
	UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
	if ((BaseIdx + NumSubElts) != NumSrcElts)
	EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
	if (BaseIdx != 0)
	EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
	return true;
	}
	}

	// Extract constant bits from shuffle node sources.
	if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) {
	// TODO - support shuffle through bitcasts.
	if (EltSizeInBits != VT.getScalarSizeInBits())
	return false;

	ArrayRef<int> Mask = SVN->getMask();
	if ((!AllowWholeUndefs \|\| !AllowPartialUndefs) &&
	llvm::any_of(Mask, [](int M) { return M < 0; }))
	return false;

	APInt UndefElts0, UndefElts1;
	SmallVector<APInt, 32> EltBits0, EltBits1;
	if (isAnyInRange(Mask, 0, NumElts) &&
	!getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
	UndefElts0, EltBits0, AllowWholeUndefs,
	AllowPartialUndefs))
	return false;
	if (isAnyInRange(Mask, NumElts, 2 * NumElts) &&
	!getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
	UndefElts1, EltBits1, AllowWholeUndefs,
	AllowPartialUndefs))
	return false;

	UndefElts = APInt::getZero(NumElts);
	for (int i = 0; i != (int)NumElts; ++i) {
	int M = Mask[i];
	if (M < 0) {
	UndefElts.setBit(i);
	EltBits.push_back(APInt::getZero(EltSizeInBits));
	} else if (M < (int)NumElts) {
	if (UndefElts0[M])
	UndefElts.setBit(i);
	EltBits.push_back(EltBits0[M]);
	} else {
	if (UndefElts1[M - NumElts])
	UndefElts.setBit(i);
	EltBits.push_back(EltBits1[M - NumElts]);
	}
	}
	return true;
	}

	return false;
	}

	namespace llvm {
	namespace X86 {
	bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) {
	APInt UndefElts;
	SmallVector<APInt, 16> EltBits;
	if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
	UndefElts, EltBits, true,
	AllowPartialUndefs)) {
	int SplatIndex = -1;
	for (int i = 0, e = EltBits.size(); i != e; ++i) {
	if (UndefElts[i])
	continue;
	if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
	SplatIndex = -1;
	break;
	}
	SplatIndex = i;
	}
	if (0 <= SplatIndex) {
	SplatVal = EltBits[SplatIndex];
	return true;
	}
	}

	return false;
	}
	} // namespace X86
	} // namespace llvm

	static bool getTargetShuffleMaskIndices(SDValue MaskNode,
	unsigned MaskEltSizeInBits,
	SmallVectorImpl<uint64_t> &RawMask,
	APInt &UndefElts) {
	// Extract the raw target constant bits.
	SmallVector<APInt, 64> EltBits;
	if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
	EltBits, /* AllowWholeUndefs */ true,
	/* AllowPartialUndefs */ false))
	return false;

	// Insert the extracted elements into the mask.
	for (const APInt &Elt : EltBits)
	RawMask.push_back(Elt.getZExtValue());

	return true;
	}

	/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
	/// A multi-stage pack shuffle mask is created by specifying NumStages > 1.
	/// Note: This ignores saturation, so inputs must be checked first.
	static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
	bool Unary, unsigned NumStages = 1) {
	assert(Mask.empty() && "Expected an empty shuffle mask vector");
	unsigned NumElts = VT.getVectorNumElements();
	unsigned NumLanes = VT.getSizeInBits() / 128;
	unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
	unsigned Offset = Unary ? 0 : NumElts;
	unsigned Repetitions = 1u << (NumStages - 1);
	unsigned Increment = 1u << NumStages;
	assert((NumEltsPerLane >> NumStages) > 0 && "Illegal packing compaction");

	for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
	for (unsigned Stage = 0; Stage != Repetitions; ++Stage) {
	for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
	Mask.push_back(Elt + (Lane * NumEltsPerLane));
	for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
	Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
	}
	}
	}

	// Split the demanded elts of a PACKSS/PACKUS node between its operands.
	static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
	APInt &DemandedLHS, APInt &DemandedRHS) {
	int NumLanes = VT.getSizeInBits() / 128;
	int NumElts = DemandedElts.getBitWidth();
	int NumInnerElts = NumElts / 2;
	int NumEltsPerLane = NumElts / NumLanes;
	int NumInnerEltsPerLane = NumInnerElts / NumLanes;

	DemandedLHS = APInt::getZero(NumInnerElts);
	DemandedRHS = APInt::getZero(NumInnerElts);

	// Map DemandedElts to the packed operands.
	for (int Lane = 0; Lane != NumLanes; ++Lane) {
	for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
	int OuterIdx = (Lane * NumEltsPerLane) + Elt;
	int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
	if (DemandedElts[OuterIdx])
	DemandedLHS.setBit(InnerIdx);
	if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
	DemandedRHS.setBit(InnerIdx);
	}
	}
	}

	// Split the demanded elts of a HADD/HSUB node between its operands.
	static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
	APInt &DemandedLHS, APInt &DemandedRHS) {
	int NumLanes = VT.getSizeInBits() / 128;
	int NumElts = DemandedElts.getBitWidth();
	int NumEltsPerLane = NumElts / NumLanes;
	int HalfEltsPerLane = NumEltsPerLane / 2;

	DemandedLHS = APInt::getZero(NumElts);
	DemandedRHS = APInt::getZero(NumElts);

	// Map DemandedElts to the horizontal operands.
	for (int Idx = 0; Idx != NumElts; ++Idx) {
	if (!DemandedElts[Idx])
	continue;
	int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
	int LocalIdx = Idx % NumEltsPerLane;
	if (LocalIdx < HalfEltsPerLane) {
	DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
	DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
	} else {
	LocalIdx -= HalfEltsPerLane;
	DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
	DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
	}
	}
	}

	/// Calculates the shuffle mask corresponding to the target-specific opcode.
	/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
	/// operands in \p Ops, and returns true.
	/// Sets \p IsUnary to true if only one source is used. Note that this will set
	/// IsUnary for shuffles which use a single input multiple times, and in those
	/// cases it will adjust the mask to only have indices within that single input.
	/// It is an error to call this with non-empty Mask/Ops vectors.
	static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
	SmallVectorImpl<SDValue> &Ops,
	SmallVectorImpl<int> &Mask, bool &IsUnary) {
	unsigned NumElems = VT.getVectorNumElements();
	unsigned MaskEltSize = VT.getScalarSizeInBits();
	SmallVector<uint64_t, 32> RawMask;
	APInt RawUndefs;
	uint64_t ImmN;

	assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
	assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector");

	IsUnary = false;
	bool IsFakeUnary = false;
	switch (N->getOpcode()) {
	case X86ISD::BLENDI:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodeBLENDMask(NumElems, ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::SHUFP:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodeSHUFPMask(NumElems, MaskEltSize, ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::INSERTPS:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodeINSERTPSMask(ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::EXTRQI:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	if (isa<ConstantSDNode>(N->getOperand(1)) &&
	isa<ConstantSDNode>(N->getOperand(2))) {
	int BitLen = N->getConstantOperandVal(1);
	int BitIdx = N->getConstantOperandVal(2);
	DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
	IsUnary = true;
	}
	break;
	case X86ISD::INSERTQI:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	if (isa<ConstantSDNode>(N->getOperand(2)) &&
	isa<ConstantSDNode>(N->getOperand(3))) {
	int BitLen = N->getConstantOperandVal(2);
	int BitIdx = N->getConstantOperandVal(3);
	DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	}
	break;
	case X86ISD::UNPCKH:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	DecodeUNPCKHMask(NumElems, MaskEltSize, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::UNPCKL:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	DecodeUNPCKLMask(NumElems, MaskEltSize, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::MOVHLPS:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	DecodeMOVHLPSMask(NumElems, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::MOVLHPS:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	DecodeMOVLHPSMask(NumElems, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::VALIGN:
	assert((VT.getScalarType() == MVT::i32 \|\| VT.getScalarType() == MVT::i64) &&
	"Only 32-bit and 64-bit elements are supported!");
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodeVALIGNMask(NumElems, ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	Ops.push_back(N->getOperand(1));
	Ops.push_back(N->getOperand(0));
	break;
	case X86ISD::PALIGNR:
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodePALIGNRMask(NumElems, ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	Ops.push_back(N->getOperand(1));
	Ops.push_back(N->getOperand(0));
	break;
	case X86ISD::VSHLDQ:
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodePSLLDQMask(NumElems, ImmN, Mask);
	IsUnary = true;
	break;
	case X86ISD::VSRLDQ:
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodePSRLDQMask(NumElems, ImmN, Mask);
	IsUnary = true;
	break;
	case X86ISD::PSHUFD:
	case X86ISD::VPERMILPI:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodePSHUFMask(NumElems, MaskEltSize, ImmN, Mask);
	IsUnary = true;
	break;
	case X86ISD::PSHUFHW:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodePSHUFHWMask(NumElems, ImmN, Mask);
	IsUnary = true;
	break;
	case X86ISD::PSHUFLW:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodePSHUFLWMask(NumElems, ImmN, Mask);
	IsUnary = true;
	break;
	case X86ISD::VZEXT_MOVL:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	DecodeZeroMoveLowMask(NumElems, Mask);
	IsUnary = true;
	break;
	case X86ISD::VBROADCAST:
	// We only decode broadcasts of same-sized vectors, peeking through to
	// extracted subvectors is likely to cause hasOneUse issues with
	// SimplifyDemandedBits etc.
	if (N->getOperand(0).getValueType() == VT) {
	DecodeVectorBroadcast(NumElems, Mask);
	IsUnary = true;
	break;
	}
	return false;
	case X86ISD::VPERMILPV: {
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	IsUnary = true;
	SDValue MaskNode = N->getOperand(1);
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
	RawUndefs)) {
	DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask);
	break;
	}
	return false;
	}
	case X86ISD::PSHUFB: {
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	IsUnary = true;
	SDValue MaskNode = N->getOperand(1);
	if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
	DecodePSHUFBMask(RawMask, RawUndefs, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMI:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodeVPERMMask(NumElems, ImmN, Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVSS:
	case X86ISD::MOVSD:
	case X86ISD::MOVSH:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	DecodeScalarMoveMask(NumElems, /* IsLoad */ false, Mask);
	break;
	case X86ISD::VPERM2X128:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	DecodeVPERM2X128Mask(NumElems, ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::SHUF128:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
	decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize, ImmN, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::MOVSLDUP:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	DecodeMOVSLDUPMask(NumElems, Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVSHDUP:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	DecodeMOVSHDUPMask(NumElems, Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVDDUP:
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	DecodeMOVDDUPMask(NumElems, Mask);
	IsUnary = true;
	break;
	case X86ISD::VPERMIL2: {
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	SDValue MaskNode = N->getOperand(2);
	SDValue CtrlNode = N->getOperand(3);
	if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
	unsigned CtrlImm = CtrlOp->getZExtValue();
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
	RawUndefs)) {
	DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs,
	Mask);
	break;
	}
	}
	return false;
	}
	case X86ISD::VPPERM: {
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	SDValue MaskNode = N->getOperand(2);
	if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
	DecodeVPPERMMask(RawMask, RawUndefs, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMV: {
	assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
	IsUnary = true;
	// Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
	Ops.push_back(N->getOperand(1));
	SDValue MaskNode = N->getOperand(0);
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
	RawUndefs)) {
	DecodeVPERMVMask(RawMask, RawUndefs, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMV3: {
	assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
	assert(N->getOperand(2).getValueType() == VT && "Unexpected value type");
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
	// Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
	Ops.push_back(N->getOperand(0));
	Ops.push_back(N->getOperand(2));
	SDValue MaskNode = N->getOperand(1);
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
	RawUndefs)) {
	DecodeVPERMV3Mask(RawMask, RawUndefs, Mask);
	break;
	}
	return false;
	}
	default: llvm_unreachable("unknown target shuffle node");
	}

	// Empty mask indicates the decode failed.
	if (Mask.empty())
	return false;

	// Check if we're getting a shuffle mask with zero'd elements.
	if (!AllowSentinelZero && isAnyZero(Mask))
	return false;

	// If we have a fake unary shuffle, the shuffle mask is spread across two
	// inputs that are actually the same node. Re-map the mask to always point
	// into the first input.
	if (IsFakeUnary)
	for (int &M : Mask)
	if (M >= (int)Mask.size())
	M -= Mask.size();

	// If we didn't already add operands in the opcode-specific code, default to
	// adding 1 or 2 operands starting at 0.
	if (Ops.empty()) {
	Ops.push_back(N->getOperand(0));
	if (!IsUnary \|\| IsFakeUnary)
	Ops.push_back(N->getOperand(1));
	}

	return true;
	}

	// Wrapper for getTargetShuffleMask with InUnary;
	static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
	SmallVectorImpl<SDValue> &Ops,
	SmallVectorImpl<int> &Mask) {
	bool IsUnary;
	return getTargetShuffleMask(N, VT, AllowSentinelZero, Ops, Mask, IsUnary);
	}

	/// Compute whether each element of a shuffle is zeroable.
	///
	/// A "zeroable" vector shuffle element is one which can be lowered to zero.
	/// Either it is an undef element in the shuffle mask, the element of the input
	/// referenced is undef, or the element of the input referenced is known to be
	/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
	/// as many lanes with this technique as possible to simplify the remaining
	/// shuffle.
	static void computeZeroableShuffleElements(ArrayRef<int> Mask,
	SDValue V1, SDValue V2,
	APInt &KnownUndef, APInt &KnownZero) {
	int Size = Mask.size();
	KnownUndef = KnownZero = APInt::getZero(Size);

	V1 = peekThroughBitcasts(V1);
	V2 = peekThroughBitcasts(V2);

	bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
	bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());

	int VectorSizeInBits = V1.getValueSizeInBits();
	int ScalarSizeInBits = VectorSizeInBits / Size;
	assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");

	for (int i = 0; i < Size; ++i) {
	int M = Mask[i];
	// Handle the easy cases.
	if (M < 0) {
	KnownUndef.setBit(i);
	continue;
	}
	if ((M >= 0 && M < Size && V1IsZero) \|\| (M >= Size && V2IsZero)) {
	KnownZero.setBit(i);
	continue;
	}

	// Determine shuffle input and normalize the mask.
	SDValue V = M < Size ? V1 : V2;
	M %= Size;

	// Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
	if (V.getOpcode() != ISD::BUILD_VECTOR)
	continue;

	// If the BUILD_VECTOR has fewer elements then the bitcasted portion of
	// the (larger) source element must be UNDEF/ZERO.
	if ((Size % V.getNumOperands()) == 0) {
	int Scale = Size / V->getNumOperands();
	SDValue Op = V.getOperand(M / Scale);
	if (Op.isUndef())
	KnownUndef.setBit(i);
	if (X86::isZeroNode(Op))
	KnownZero.setBit(i);
	else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
	APInt Val = Cst->getAPIntValue();
	Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
	if (Val == 0)
	KnownZero.setBit(i);
	} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
	APInt Val = Cst->getValueAPF().bitcastToAPInt();
	Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
	if (Val == 0)
	KnownZero.setBit(i);
	}
	continue;
	}

	// If the BUILD_VECTOR has more elements then all the (smaller) source
	// elements must be UNDEF or ZERO.
	if ((V.getNumOperands() % Size) == 0) {
	int Scale = V->getNumOperands() / Size;
	bool AllUndef = true;
	bool AllZero = true;
	for (int j = 0; j < Scale; ++j) {
	SDValue Op = V.getOperand((M * Scale) + j);
	AllUndef &= Op.isUndef();
	AllZero &= X86::isZeroNode(Op);
	}
	if (AllUndef)
	KnownUndef.setBit(i);
	if (AllZero)
	KnownZero.setBit(i);
	continue;
	}
	}
	}

	/// Decode a target shuffle mask and inputs and see if any values are
	/// known to be undef or zero from their inputs.
	/// Returns true if the target shuffle mask was decoded.
	/// FIXME: Merge this with computeZeroableShuffleElements?
	static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
	SmallVectorImpl<SDValue> &Ops,
	APInt &KnownUndef, APInt &KnownZero) {
	bool IsUnary;
	if (!isTargetShuffle(N.getOpcode()))
	return false;

	MVT VT = N.getSimpleValueType();
	if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
	return false;

	int Size = Mask.size();
	SDValue V1 = Ops[0];
	SDValue V2 = IsUnary ? V1 : Ops[1];
	KnownUndef = KnownZero = APInt::getZero(Size);

	V1 = peekThroughBitcasts(V1);
	V2 = peekThroughBitcasts(V2);

	assert((VT.getSizeInBits() % Size) == 0 &&
	"Illegal split of shuffle value type");
	unsigned EltSizeInBits = VT.getSizeInBits() / Size;

	// Extract known constant input data.
	APInt UndefSrcElts[2];
	SmallVector<APInt, 32> SrcEltBits[2];
	bool IsSrcConstant[2] = {
	getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
	SrcEltBits[0], true, false),
	getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
	SrcEltBits[1], true, false)};

	for (int i = 0; i < Size; ++i) {
	int M = Mask[i];

	// Already decoded as SM_SentinelZero / SM_SentinelUndef.
	if (M < 0) {
	assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!");
	if (SM_SentinelUndef == M)
	KnownUndef.setBit(i);
	if (SM_SentinelZero == M)
	KnownZero.setBit(i);
	continue;
	}

	// Determine shuffle input and normalize the mask.
	unsigned SrcIdx = M / Size;
	SDValue V = M < Size ? V1 : V2;
	M %= Size;

	// We are referencing an UNDEF input.
	if (V.isUndef()) {
	KnownUndef.setBit(i);
	continue;
	}

	// SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
	// TODO: We currently only set UNDEF for integer types - floats use the same
	// registers as vectors and many of the scalar folded loads rely on the
	// SCALAR_TO_VECTOR pattern.
	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	(Size % V.getValueType().getVectorNumElements()) == 0) {
	int Scale = Size / V.getValueType().getVectorNumElements();
	int Idx = M / Scale;
	if (Idx != 0 && !VT.isFloatingPoint())
	KnownUndef.setBit(i);
	else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
	KnownZero.setBit(i);
	continue;
	}

	// INSERT_SUBVECTOR - to widen vectors we often insert them into UNDEF
	// base vectors.
	if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
	SDValue Vec = V.getOperand(0);
	int NumVecElts = Vec.getValueType().getVectorNumElements();
	if (Vec.isUndef() && Size == NumVecElts) {
	int Idx = V.getConstantOperandVal(2);
	int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements();
	if (M < Idx \|\| (Idx + NumSubElts) <= M)
	KnownUndef.setBit(i);
	}
	continue;
	}

	// Attempt to extract from the source's constant bits.
	if (IsSrcConstant[SrcIdx]) {
	if (UndefSrcElts[SrcIdx][M])
	KnownUndef.setBit(i);
	else if (SrcEltBits[SrcIdx][M] == 0)
	KnownZero.setBit(i);
	}
	}

	assert(VT.getVectorNumElements() == (unsigned)Size &&
	"Different mask size from vector size!");
	return true;
	}

	// Replace target shuffle mask elements with known undef/zero sentinels.
	static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask,
	const APInt &KnownUndef,
	const APInt &KnownZero,
	bool ResolveKnownZeros= true) {
	unsigned NumElts = Mask.size();
	assert(KnownUndef.getBitWidth() == NumElts &&
	KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");

	for (unsigned i = 0; i != NumElts; ++i) {
	if (KnownUndef[i])
	Mask[i] = SM_SentinelUndef;
	else if (ResolveKnownZeros && KnownZero[i])
	Mask[i] = SM_SentinelZero;
	}
	}

	// Extract target shuffle mask sentinel elements to known undef/zero bitmasks.
	static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask,
	APInt &KnownUndef,
	APInt &KnownZero) {
	unsigned NumElts = Mask.size();
	KnownUndef = KnownZero = APInt::getZero(NumElts);

	for (unsigned i = 0; i != NumElts; ++i) {
	int M = Mask[i];
	if (SM_SentinelUndef == M)
	KnownUndef.setBit(i);
	if (SM_SentinelZero == M)
	KnownZero.setBit(i);
	}
	}

	// Attempt to create a shuffle mask from a VSELECT/BLENDV condition mask.
	static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
	SDValue Cond, bool IsBLENDV = false) {
	EVT CondVT = Cond.getValueType();
	unsigned EltSizeInBits = CondVT.getScalarSizeInBits();
	unsigned NumElts = CondVT.getVectorNumElements();

	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits,
	true, false))
	return false;

	Mask.resize(NumElts, SM_SentinelUndef);

	for (int i = 0; i != (int)NumElts; ++i) {
	Mask[i] = i;
	// Arbitrarily choose from the 2nd operand if the select condition element
	// is undef.
	// TODO: Can we do better by matching patterns such as even/odd?
	if (UndefElts[i] \|\| (!IsBLENDV && EltBits[i].isZero()) \|\|
	(IsBLENDV && EltBits[i].isNonNegative()))
	Mask[i] += NumElts;
	}

	return true;
	}

	// Forward declaration (for getFauxShuffleMask recursive check).
	static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
	SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask,
	const SelectionDAG &DAG, unsigned Depth,
	bool ResolveKnownElts);

	// Attempt to decode ops that could be represented as a shuffle mask.
	// The decoded shuffle mask may contain a different number of elements to the
	// destination value type.
	// TODO: Merge into getTargetShuffleInputs()
	static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
	SmallVectorImpl<int> &Mask,
	SmallVectorImpl<SDValue> &Ops,
	const SelectionDAG &DAG, unsigned Depth,
	bool ResolveKnownElts) {
	Mask.clear();
	Ops.clear();

	MVT VT = N.getSimpleValueType();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned NumSizeInBits = VT.getSizeInBits();
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
	if ((NumBitsPerElt % 8) != 0 \|\| (NumSizeInBits % 8) != 0)
	return false;
	assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size");
	unsigned NumSizeInBytes = NumSizeInBits / 8;
	unsigned NumBytesPerElt = NumBitsPerElt / 8;

	unsigned Opcode = N.getOpcode();
	switch (Opcode) {
	case ISD::VECTOR_SHUFFLE: {
	// Don't treat ISD::VECTOR_SHUFFLE as a target shuffle so decode it here.
	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask();
	if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) {
	Mask.append(ShuffleMask.begin(), ShuffleMask.end());
	Ops.push_back(N.getOperand(0));
	Ops.push_back(N.getOperand(1));
	return true;
	}
	return false;
	}
	case ISD::AND:
	case X86ISD::ANDNP: {
	// Attempt to decode as a per-byte mask.
	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);
	bool IsAndN = (X86ISD::ANDNP == Opcode);
	uint64_t ZeroMask = IsAndN ? 255 : 0;
	if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
	return false;
	// We can't assume an undef src element gives an undef dst - the other src
	// might be zero.
	if (!UndefElts.isZero())
	return false;
	for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
	const APInt &ByteBits = EltBits[i];
	if (ByteBits != 0 && ByteBits != 255)
	return false;
	Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
	}
	Ops.push_back(IsAndN ? N1 : N0);
	return true;
	}
	case ISD::OR: {
	// Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
	// is a valid shuffle index.
	SDValue N0 = peekThroughBitcasts(N.getOperand(0));
	SDValue N1 = peekThroughBitcasts(N.getOperand(1));
	if (!N0.getValueType().isVector() \|\| !N1.getValueType().isVector())
	return false;

	SmallVector<int, 64> SrcMask0, SrcMask1;
	SmallVector<SDValue, 2> SrcInputs0, SrcInputs1;
	APInt Demand0 = APInt::getAllOnes(N0.getValueType().getVectorNumElements());
	APInt Demand1 = APInt::getAllOnes(N1.getValueType().getVectorNumElements());
	if (!getTargetShuffleInputs(N0, Demand0, SrcInputs0, SrcMask0, DAG,
	Depth + 1, true) \|\|
	!getTargetShuffleInputs(N1, Demand1, SrcInputs1, SrcMask1, DAG,
	Depth + 1, true))
	return false;

	size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
	SmallVector<int, 64> Mask0, Mask1;
	narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
	narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
	for (int i = 0; i != (int)MaskSize; ++i) {
	// NOTE: Don't handle SM_SentinelUndef, as we can end up in infinite
	// loops converting between OR and BLEND shuffles due to
	// canWidenShuffleElements merging away undef elements, meaning we
	// fail to recognise the OR as the undef element isn't known zero.
	if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
	Mask.push_back(SM_SentinelZero);
	else if (Mask1[i] == SM_SentinelZero)
	Mask.push_back(i);
	else if (Mask0[i] == SM_SentinelZero)
	Mask.push_back(i + MaskSize);
	else
	return false;
	}
	Ops.push_back(N0);
	Ops.push_back(N1);
	return true;
	}
	case ISD::INSERT_SUBVECTOR: {
	SDValue Src = N.getOperand(0);
	SDValue Sub = N.getOperand(1);
	EVT SubVT = Sub.getValueType();
	unsigned NumSubElts = SubVT.getVectorNumElements();
	if (!N->isOnlyUserOf(Sub.getNode()))
	return false;
	uint64_t InsertIdx = N.getConstantOperandVal(2);
	// Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
	if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Sub.getOperand(0).getValueType() == VT) {
	uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
	for (int i = 0; i != (int)NumElts; ++i)
	Mask.push_back(i);
	for (int i = 0; i != (int)NumSubElts; ++i)
	Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
	Ops.push_back(Src);
	Ops.push_back(Sub.getOperand(0));
	return true;
	}
	// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
	SmallVector<int, 64> SubMask;
	SmallVector<SDValue, 2> SubInputs;
	SDValue SubSrc = peekThroughOneUseBitcasts(Sub);
	EVT SubSrcVT = SubSrc.getValueType();
	if (!SubSrcVT.isVector())
	return false;

	APInt SubDemand = APInt::getAllOnes(SubSrcVT.getVectorNumElements());
	if (!getTargetShuffleInputs(SubSrc, SubDemand, SubInputs, SubMask, DAG,
	Depth + 1, ResolveKnownElts))
	return false;

	// Subvector shuffle inputs must not be larger than the subvector.
	if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) {
	return SubVT.getFixedSizeInBits() <
	SubInput.getValueSizeInBits().getFixedValue();
	}))
	return false;

	if (SubMask.size() != NumSubElts) {
	assert(((SubMask.size() % NumSubElts) == 0 \|\|
	(NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
	if ((NumSubElts % SubMask.size()) == 0) {
	int Scale = NumSubElts / SubMask.size();
	SmallVector<int,64> ScaledSubMask;
	narrowShuffleMaskElts(Scale, SubMask, ScaledSubMask);
	SubMask = ScaledSubMask;
	} else {
	int Scale = SubMask.size() / NumSubElts;
	NumSubElts = SubMask.size();
	NumElts *= Scale;
	InsertIdx *= Scale;
	}
	}
	Ops.push_back(Src);
	Ops.append(SubInputs.begin(), SubInputs.end());
	if (ISD::isBuildVectorAllZeros(Src.getNode()))
	Mask.append(NumElts, SM_SentinelZero);
	else
	for (int i = 0; i != (int)NumElts; ++i)
	Mask.push_back(i);
	for (int i = 0; i != (int)NumSubElts; ++i) {
	int M = SubMask[i];
	if (0 <= M) {
	int InputIdx = M / NumSubElts;
	M = (NumElts * (1 + InputIdx)) + (M % NumSubElts);
	}
	Mask[i + InsertIdx] = M;
	}
	return true;
	}
	case X86ISD::PINSRB:
	case X86ISD::PINSRW:
	case ISD::SCALAR_TO_VECTOR:
	case ISD::INSERT_VECTOR_ELT: {
	// Match against a insert_vector_elt/scalar_to_vector of an extract from a
	// vector, for matching src/dst vector types.
	SDValue Scl = N.getOperand(Opcode == ISD::SCALAR_TO_VECTOR ? 0 : 1);

	unsigned DstIdx = 0;
	if (Opcode != ISD::SCALAR_TO_VECTOR) {
	// Check we have an in-range constant insertion index.
	if (!isa<ConstantSDNode>(N.getOperand(2)) \|\|
	N.getConstantOperandAPInt(2).uge(NumElts))
	return false;
	DstIdx = N.getConstantOperandVal(2);

	// Attempt to recognise an INSERT*(VEC, 0, DstIdx) shuffle pattern.
	if (X86::isZeroNode(Scl)) {
	Ops.push_back(N.getOperand(0));
	for (unsigned i = 0; i != NumElts; ++i)
	Mask.push_back(i == DstIdx ? SM_SentinelZero : (int)i);
	return true;
	}
	}

	// Peek through trunc/aext/zext.
	// TODO: aext shouldn't require SM_SentinelZero padding.
	// TODO: handle shift of scalars.
	unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits();
	while (Scl.getOpcode() == ISD::TRUNCATE \|\|
	Scl.getOpcode() == ISD::ANY_EXTEND \|\|
	Scl.getOpcode() == ISD::ZERO_EXTEND) {
	Scl = Scl.getOperand(0);
	MinBitsPerElt =
	std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits());
	}
	if ((MinBitsPerElt % 8) != 0)
	return false;

	// Attempt to find the source vector the scalar was extracted from.
	SDValue SrcExtract;
	if ((Scl.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
	Scl.getOpcode() == X86ISD::PEXTRW \|\|
	Scl.getOpcode() == X86ISD::PEXTRB) &&
	Scl.getOperand(0).getValueSizeInBits() == NumSizeInBits) {
	SrcExtract = Scl;
	}
	if (!SrcExtract \|\| !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
	return false;

	SDValue SrcVec = SrcExtract.getOperand(0);
	EVT SrcVT = SrcVec.getValueType();
	if (!SrcVT.getScalarType().isByteSized())
	return false;
	unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
	unsigned SrcByte = SrcIdx * (SrcVT.getScalarSizeInBits() / 8);
	unsigned DstByte = DstIdx * NumBytesPerElt;
	MinBitsPerElt =
	std::min<unsigned>(MinBitsPerElt, SrcVT.getScalarSizeInBits());

	// Create 'identity' byte level shuffle mask and then add inserted bytes.
	if (Opcode == ISD::SCALAR_TO_VECTOR) {
	Ops.push_back(SrcVec);
	Mask.append(NumSizeInBytes, SM_SentinelUndef);
	} else {
	Ops.push_back(SrcVec);
	Ops.push_back(N.getOperand(0));
	for (int i = 0; i != (int)NumSizeInBytes; ++i)
	Mask.push_back(NumSizeInBytes + i);
	}

	unsigned MinBytesPerElts = MinBitsPerElt / 8;
	MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt);
	for (unsigned i = 0; i != MinBytesPerElts; ++i)
	Mask[DstByte + i] = SrcByte + i;
	for (unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i)
	Mask[DstByte + i] = SM_SentinelZero;
	return true;
	}
	case X86ISD::PACKSS:
	case X86ISD::PACKUS: {
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);
	assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
	N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
	"Unexpected input value type");

	APInt EltsLHS, EltsRHS;
	getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS);

	// If we know input saturation won't happen (or we don't care for particular
	// lanes), we can treat this as a truncation shuffle.
	bool Offset0 = false, Offset1 = false;
	if (Opcode == X86ISD::PACKSS) {
	if ((!(N0.isUndef() \|\| EltsLHS.isZero()) &&
	DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) \|\|
	(!(N1.isUndef() \|\| EltsRHS.isZero()) &&
	DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt))
	return false;
	// We can't easily fold ASHR into a shuffle, but if it was feeding a
	// PACKSS then it was likely being used for sign-extension for a
	// truncation, so just peek through and adjust the mask accordingly.
	if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) &&
	N0.getConstantOperandAPInt(1) == NumBitsPerElt) {
	Offset0 = true;
	N0 = N0.getOperand(0);
	}
	if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) &&
	N1.getConstantOperandAPInt(1) == NumBitsPerElt) {
	Offset1 = true;
	N1 = N1.getOperand(0);
	}
	} else {
	APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
	if ((!(N0.isUndef() \|\| EltsLHS.isZero()) &&
	!DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) \|\|
	(!(N1.isUndef() \|\| EltsRHS.isZero()) &&
	!DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1)))
	return false;
	}

	bool IsUnary = (N0 == N1);

	Ops.push_back(N0);
	if (!IsUnary)
	Ops.push_back(N1);

	createPackShuffleMask(VT, Mask, IsUnary);

	if (Offset0 \|\| Offset1) {
	for (int &M : Mask)
	if ((Offset0 && isInRange(M, 0, NumElts)) \|\|
	(Offset1 && isInRange(M, NumElts, 2 * NumElts)))
	++M;
	}
	return true;
	}
	case ISD::VSELECT:
	case X86ISD::BLENDV: {
	SDValue Cond = N.getOperand(0);
	if (createShuffleMaskFromVSELECT(Mask, Cond, Opcode == X86ISD::BLENDV)) {
	Ops.push_back(N.getOperand(1));
	Ops.push_back(N.getOperand(2));
	return true;
	}
	return false;
	}
	case X86ISD::VTRUNC: {
	SDValue Src = N.getOperand(0);
	EVT SrcVT = Src.getValueType();
	// Truncated source must be a simple vector.
	if (!SrcVT.isSimple() \|\| (SrcVT.getSizeInBits() % 128) != 0 \|\|
	(SrcVT.getScalarSizeInBits() % 8) != 0)
	return false;
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits();
	unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt;
	assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 && "Illegal truncation");
	for (unsigned i = 0; i != NumSrcElts; ++i)
	Mask.push_back(i * Scale);
	Mask.append(NumElts - NumSrcElts, SM_SentinelZero);
	Ops.push_back(Src);
	return true;
	}
	case X86ISD::VSHLI:
	case X86ISD::VSRLI: {
	uint64_t ShiftVal = N.getConstantOperandVal(1);
	// Out of range bit shifts are guaranteed to be zero.
	if (NumBitsPerElt <= ShiftVal) {
	Mask.append(NumElts, SM_SentinelZero);
	return true;
	}

	// We can only decode 'whole byte' bit shifts as shuffles.
	if ((ShiftVal % 8) != 0)
	break;

	uint64_t ByteShift = ShiftVal / 8;
	Ops.push_back(N.getOperand(0));

	// Clear mask to all zeros and insert the shifted byte indices.
	Mask.append(NumSizeInBytes, SM_SentinelZero);

	if (X86ISD::VSHLI == Opcode) {
	for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
	for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
	Mask[i + j] = i + j - ByteShift;
	} else {
	for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
	for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
	Mask[i + j - ByteShift] = i + j;
	}
	return true;
	}
	case X86ISD::VROTLI:
	case X86ISD::VROTRI: {
	// We can only decode 'whole byte' bit rotates as shuffles.
	uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt);
	if ((RotateVal % 8) != 0)
	return false;
	Ops.push_back(N.getOperand(0));
	int Offset = RotateVal / 8;
	Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset);
	for (int i = 0; i != (int)NumElts; ++i) {
	int BaseIdx = i * NumBytesPerElt;
	for (int j = 0; j != (int)NumBytesPerElt; ++j) {
	Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt));
	}
	}
	return true;
	}
	case X86ISD::VBROADCAST: {
	SDValue Src = N.getOperand(0);
	if (!Src.getSimpleValueType().isVector()) {
	if (Src.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isNullConstant(Src.getOperand(1)) \|\|
	Src.getOperand(0).getValueType().getScalarType() !=
	VT.getScalarType())
	return false;
	Src = Src.getOperand(0);
	}
	Ops.push_back(Src);
	Mask.append(NumElts, 0);
	return true;
	}
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	case ISD::ANY_EXTEND_VECTOR_INREG: {
	SDValue Src = N.getOperand(0);
	EVT SrcVT = Src.getValueType();

	// Extended source must be a simple vector.
	if (!SrcVT.isSimple() \|\| (SrcVT.getSizeInBits() % 128) != 0 \|\|
	(SrcVT.getScalarSizeInBits() % 8) != 0)
	return false;

	bool IsAnyExtend =
	(ISD::ANY_EXTEND == Opcode \|\| ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
	DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
	IsAnyExtend, Mask);
	Ops.push_back(Src);
	return true;
	}
	}

	return false;
	}

	/// Removes unused/repeated shuffle source inputs and adjusts the shuffle mask.
	static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask) {
	int MaskWidth = Mask.size();
	SmallVector<SDValue, 16> UsedInputs;
	for (int i = 0, e = Inputs.size(); i < e; ++i) {
	int lo = UsedInputs.size() * MaskWidth;
	int hi = lo + MaskWidth;

	// Strip UNDEF input usage.
	if (Inputs[i].isUndef())
	for (int &M : Mask)
	if ((lo <= M) && (M < hi))
	M = SM_SentinelUndef;

	// Check for unused inputs.
	if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
	for (int &M : Mask)
	if (lo <= M)
	M -= MaskWidth;
	continue;
	}

	// Check for repeated inputs.
	bool IsRepeat = false;
	for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) {
	if (UsedInputs[j] != Inputs[i])
	continue;
	for (int &M : Mask)
	if (lo <= M)
	M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
	IsRepeat = true;
	break;
	}
	if (IsRepeat)
	continue;

	UsedInputs.push_back(Inputs[i]);
	}
	Inputs = UsedInputs;
	}

	/// Calls getTargetShuffleAndZeroables to resolve a target shuffle mask's inputs
	/// and then sets the SM_SentinelUndef and SM_SentinelZero values.
	/// Returns true if the target shuffle mask was decoded.
	static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
	SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask,
	APInt &KnownUndef, APInt &KnownZero,
	const SelectionDAG &DAG, unsigned Depth,
	bool ResolveKnownElts) {
	if (Depth >= SelectionDAG::MaxRecursionDepth)
	return false; // Limit search depth.

	EVT VT = Op.getValueType();
	if (!VT.isSimple() \|\| !VT.isVector())
	return false;

	if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) {
	if (ResolveKnownElts)
	resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero);
	return true;
	}
	if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth,
	ResolveKnownElts)) {
	resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero);
	return true;
	}
	return false;
	}

	static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
	SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask,
	const SelectionDAG &DAG, unsigned Depth,
	bool ResolveKnownElts) {
	APInt KnownUndef, KnownZero;
	return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef,
	KnownZero, DAG, Depth, ResolveKnownElts);
	}

	static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask,
	const SelectionDAG &DAG, unsigned Depth = 0,
	bool ResolveKnownElts = true) {
	EVT VT = Op.getValueType();
	if (!VT.isSimple() \|\| !VT.isVector())
	return false;

	unsigned NumElts = Op.getValueType().getVectorNumElements();
	APInt DemandedElts = APInt::getAllOnes(NumElts);
	return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, DAG, Depth,
	ResolveKnownElts);
	}

	// Attempt to create a scalar/subvector broadcast from the base MemSDNode.
	static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT,
	EVT MemVT, MemSDNode *Mem, unsigned Offset,
	SelectionDAG &DAG) {
	assert((Opcode == X86ISD::VBROADCAST_LOAD \|\|
	Opcode == X86ISD::SUBV_BROADCAST_LOAD) &&
	"Unknown broadcast load type");

	// Ensure this is a simple (non-atomic, non-voltile), temporal read memop.
	if (!Mem \|\| !Mem->readMem() \|\| !Mem->isSimple() \|\| Mem->isNonTemporal())
	return SDValue();

	SDValue Ptr =
	DAG.getMemBasePlusOffset(Mem->getBasePtr(), TypeSize::Fixed(Offset), DL);
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {Mem->getChain(), Ptr};
	SDValue BcstLd = DAG.getMemIntrinsicNode(
	Opcode, DL, Tys, Ops, MemVT,
	DAG.getMachineFunction().getMachineMemOperand(
	Mem->getMemOperand(), Offset, MemVT.getStoreSize()));
	DAG.makeEquivalentMemoryOrdering(SDValue(Mem, 1), BcstLd.getValue(1));
	return BcstLd;
	}

	/// Returns the scalar element that will make up the i'th
	/// element of the result of the vector shuffle.
	static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
	SelectionDAG &DAG, unsigned Depth) {
	if (Depth >= SelectionDAG::MaxRecursionDepth)
	return SDValue(); // Limit search depth.

	EVT VT = Op.getValueType();
	unsigned Opcode = Op.getOpcode();
	unsigned NumElems = VT.getVectorNumElements();

	// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
	if (auto *SV = dyn_cast<ShuffleVectorSDNode>(Op)) {
	int Elt = SV->getMaskElt(Index);

	if (Elt < 0)
	return DAG.getUNDEF(VT.getVectorElementType());

	SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);
	return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1);
	}

	// Recurse into target specific vector shuffles to find scalars.
	if (isTargetShuffle(Opcode)) {
	MVT ShufVT = VT.getSimpleVT();
	MVT ShufSVT = ShufVT.getVectorElementType();
	int NumElems = (int)ShufVT.getVectorNumElements();
	SmallVector<int, 16> ShuffleMask;
	SmallVector<SDValue, 16> ShuffleOps;
	if (!getTargetShuffleMask(Op.getNode(), ShufVT, true, ShuffleOps,
	ShuffleMask))
	return SDValue();

	int Elt = ShuffleMask[Index];
	if (Elt == SM_SentinelZero)
	return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(Op), ShufSVT)
	: DAG.getConstantFP(+0.0, SDLoc(Op), ShufSVT);
	if (Elt == SM_SentinelUndef)
	return DAG.getUNDEF(ShufSVT);

	assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range");
	SDValue Src = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
	return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1);
	}

	// Recurse into insert_subvector base/sub vector to find scalars.
	if (Opcode == ISD::INSERT_SUBVECTOR) {
	SDValue Vec = Op.getOperand(0);
	SDValue Sub = Op.getOperand(1);
	uint64_t SubIdx = Op.getConstantOperandVal(2);
	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

	if (SubIdx <= Index && Index < (SubIdx + NumSubElts))
	return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1);
	return getShuffleScalarElt(Vec, Index, DAG, Depth + 1);
	}

	// Recurse into concat_vectors sub vector to find scalars.
	if (Opcode == ISD::CONCAT_VECTORS) {
	EVT SubVT = Op.getOperand(0).getValueType();
	unsigned NumSubElts = SubVT.getVectorNumElements();
	uint64_t SubIdx = Index / NumSubElts;
	uint64_t SubElt = Index % NumSubElts;
	return getShuffleScalarElt(Op.getOperand(SubIdx), SubElt, DAG, Depth + 1);
	}

	// Recurse into extract_subvector src vector to find scalars.
	if (Opcode == ISD::EXTRACT_SUBVECTOR) {
	SDValue Src = Op.getOperand(0);
	uint64_t SrcIdx = Op.getConstantOperandVal(1);
	return getShuffleScalarElt(Src, Index + SrcIdx, DAG, Depth + 1);
	}

	// We only peek through bitcasts of the same vector width.
	if (Opcode == ISD::BITCAST) {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	if (SrcVT.isVector() && SrcVT.getVectorNumElements() == NumElems)
	return getShuffleScalarElt(Src, Index, DAG, Depth + 1);
	return SDValue();
	}

	// Actual nodes that may contain scalar elements

	// For insert_vector_elt - either return the index matching scalar or recurse
	// into the base vector.
	if (Opcode == ISD::INSERT_VECTOR_ELT &&
	isa<ConstantSDNode>(Op.getOperand(2))) {
	if (Op.getConstantOperandAPInt(2) == Index)
	return Op.getOperand(1);
	return getShuffleScalarElt(Op.getOperand(0), Index, DAG, Depth + 1);
	}

	if (Opcode == ISD::SCALAR_TO_VECTOR)
	return (Index == 0) ? Op.getOperand(0)
	: DAG.getUNDEF(VT.getVectorElementType());

	if (Opcode == ISD::BUILD_VECTOR)
	return Op.getOperand(Index);

	return SDValue();
	}

	// Use PINSRB/PINSRW/PINSRD to create a build vector.
	static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask,
	unsigned NumNonZero, unsigned NumZero,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	unsigned NumElts = VT.getVectorNumElements();
	assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) \|\|
	((VT == MVT::v16i8 \|\| VT == MVT::v4i32) && Subtarget.hasSSE41())) &&
	"Illegal vector insertion");

	SDLoc dl(Op);
	SDValue V;
	bool First = true;

	for (unsigned i = 0; i < NumElts; ++i) {
	bool IsNonZero = NonZeroMask[i];
	if (!IsNonZero)
	continue;

	// If the build vector contains zeros or our first insertion is not the
	// first index then insert into zero vector to break any register
	// dependency else use SCALAR_TO_VECTOR.
	if (First) {
	First = false;
	if (NumZero \|\| 0 != i)
	V = getZeroVector(VT, Subtarget, DAG, dl);
	else {
	assert(0 == i && "Expected insertion into zero-index");
	V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
	V = DAG.getBitcast(VT, V);
	continue;
	}
	}
	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i),
	DAG.getIntPtrConstant(i, dl));
	}

	return V;
	}

	/// Custom lower build_vector of v16i8.
	static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask,
	unsigned NumNonZero, unsigned NumZero,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (NumNonZero > 8 && !Subtarget.hasSSE41())
	return SDValue();

	// SSE4.1 - use PINSRB to insert each byte directly.
	if (Subtarget.hasSSE41())
	return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
	Subtarget);

	SDLoc dl(Op);
	SDValue V;

	// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
	for (unsigned i = 0; i < 16; i += 2) {
	bool ThisIsNonZero = NonZeroMask[i];
	bool NextIsNonZero = NonZeroMask[i + 1];
	if (!ThisIsNonZero && !NextIsNonZero)
	continue;

	// FIXME: Investigate combining the first 4 bytes as a i32 instead.
	SDValue Elt;
	if (ThisIsNonZero) {
	if (NumZero \|\| NextIsNonZero)
	Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
	else
	Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
	}

	if (NextIsNonZero) {
	SDValue NextElt = Op.getOperand(i + 1);
	if (i == 0 && NumZero)
	NextElt = DAG.getZExtOrTrunc(NextElt, dl, MVT::i32);
	else
	NextElt = DAG.getAnyExtOrTrunc(NextElt, dl, MVT::i32);
	NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt,
	DAG.getConstant(8, dl, MVT::i8));
	if (ThisIsNonZero)
	Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt);
	else
	Elt = NextElt;
	}

	// If our first insertion is not the first index or zeros are needed, then
	// insert into zero vector. Otherwise, use SCALAR_TO_VECTOR (leaves high
	// elements undefined).
	if (!V) {
	if (i != 0 \|\| NumZero)
	V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
	else {
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt);
	V = DAG.getBitcast(MVT::v8i16, V);
	continue;
	}
	}
	Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt);
	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt,
	DAG.getIntPtrConstant(i / 2, dl));
	}

	return DAG.getBitcast(MVT::v16i8, V);
	}

	/// Custom lower build_vector of v8i16.
	static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask,
	unsigned NumNonZero, unsigned NumZero,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (NumNonZero > 4 && !Subtarget.hasSSE41())
	return SDValue();

	// Use PINSRW to insert each byte directly.
	return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
	Subtarget);
	}

	/// Custom lower build_vector of v4i32 or v4f32.
	static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// If this is a splat of a pair of elements, use MOVDDUP (unless the target
	// has XOP; in that case defer lowering to potentially use VPERMIL2PS).
	// Because we're creating a less complicated build vector here, we may enable
	// further folding of the MOVDDUP via shuffle transforms.
	if (Subtarget.hasSSE3() && !Subtarget.hasXOP() &&
	Op.getOperand(0) == Op.getOperand(2) &&
	Op.getOperand(1) == Op.getOperand(3) &&
	Op.getOperand(0) != Op.getOperand(1)) {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	// Create a new build vector with the first 2 elements followed by undef
	// padding, bitcast to v2f64, duplicate, and bitcast back.
	SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
	DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
	SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
	SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
	return DAG.getBitcast(VT, Dup);
	}

	// Find all zeroable elements.
	std::bitset<4> Zeroable, Undefs;
	for (int i = 0; i < 4; ++i) {
	SDValue Elt = Op.getOperand(i);
	Undefs[i] = Elt.isUndef();
	Zeroable[i] = (Elt.isUndef() \|\| X86::isZeroNode(Elt));
	}
	assert(Zeroable.size() - Zeroable.count() > 1 &&
	"We expect at least two non-zero elements!");

	// We only know how to deal with build_vector nodes where elements are either
	// zeroable or extract_vector_elt with constant index.
	SDValue FirstNonZero;
	unsigned FirstNonZeroIdx;
	for (unsigned i = 0; i < 4; ++i) {
	if (Zeroable[i])
	continue;
	SDValue Elt = Op.getOperand(i);
	if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Elt.getOperand(1)))
	return SDValue();
	// Make sure that this node is extracting from a 128-bit vector.
	MVT VT = Elt.getOperand(0).getSimpleValueType();
	if (!VT.is128BitVector())
	return SDValue();
	if (!FirstNonZero.getNode()) {
	FirstNonZero = Elt;
	FirstNonZeroIdx = i;
	}
	}

	assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
	SDValue V1 = FirstNonZero.getOperand(0);
	MVT VT = V1.getSimpleValueType();

	// See if this build_vector can be lowered as a blend with zero.
	SDValue Elt;
	unsigned EltMaskIdx, EltIdx;
	int Mask[4];
	for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
	if (Zeroable[EltIdx]) {
	// The zero vector will be on the right hand side.
	Mask[EltIdx] = EltIdx+4;
	continue;
	}

	Elt = Op->getOperand(EltIdx);
	// By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
	EltMaskIdx = Elt.getConstantOperandVal(1);
	if (Elt.getOperand(0) != V1 \|\| EltMaskIdx != EltIdx)
	break;
	Mask[EltIdx] = EltIdx;
	}

	if (EltIdx == 4) {
	// Let the shuffle legalizer deal with blend operations.
	SDValue VZeroOrUndef = (Zeroable == Undefs)
	? DAG.getUNDEF(VT)
	: getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
	if (V1.getSimpleValueType() != VT)
	V1 = DAG.getBitcast(VT, V1);
	return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask);
	}

	// See if we can lower this build_vector to a INSERTPS.
	if (!Subtarget.hasSSE41())
	return SDValue();

	SDValue V2 = Elt.getOperand(0);
	if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
	V1 = SDValue();

	bool CanFold = true;
	for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
	if (Zeroable[i])
	continue;

	SDValue Current = Op->getOperand(i);
	SDValue SrcVector = Current->getOperand(0);
	if (!V1.getNode())
	V1 = SrcVector;
	CanFold = (SrcVector == V1) && (Current.getConstantOperandAPInt(1) == i);
	}

	if (!CanFold)
	return SDValue();

	assert(V1.getNode() && "Expected at least two non-zero elements!");
	if (V1.getSimpleValueType() != MVT::v4f32)
	V1 = DAG.getBitcast(MVT::v4f32, V1);
	if (V2.getSimpleValueType() != MVT::v4f32)
	V2 = DAG.getBitcast(MVT::v4f32, V2);

	// Ok, we can emit an INSERTPS instruction.
	unsigned ZMask = Zeroable.to_ulong();

	unsigned InsertPSMask = EltMaskIdx << 6 \| EltIdx << 4 \| ZMask;
	assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
	SDLoc DL(Op);
	SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
	DAG.getIntPtrConstant(InsertPSMask, DL, true));
	return DAG.getBitcast(VT, Result);
	}

	/// Return a vector logical shift node.
	static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
	SelectionDAG &DAG, const TargetLowering &TLI,
	const SDLoc &dl) {
	assert(VT.is128BitVector() && "Unknown type for VShift");
	MVT ShVT = MVT::v16i8;
	unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
	SrcOp = DAG.getBitcast(ShVT, SrcOp);
	assert(NumBits % 8 == 0 && "Only support byte sized shifts");
	SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8);
	return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
	}

	static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
	SelectionDAG &DAG) {

	// Check if the scalar load can be widened into a vector load. And if
	// the address is "base + cst" see if the cst can be "absorbed" into
	// the shuffle mask.
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
	SDValue Ptr = LD->getBasePtr();
	if (!ISD::isNormalLoad(LD) \|\| !LD->isSimple())
	return SDValue();
	EVT PVT = LD->getValueType(0);
	if (PVT != MVT::i32 && PVT != MVT::f32)
	return SDValue();

	int FI = -1;
	int64_t Offset = 0;
	if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
	FI = FINode->getIndex();
	Offset = 0;
	} else if (DAG.isBaseWithConstantOffset(Ptr) &&
	isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
	FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
	Offset = Ptr.getConstantOperandVal(1);
	Ptr = Ptr.getOperand(0);
	} else {
	return SDValue();
	}

	// FIXME: 256-bit vector instructions don't require a strict alignment,
	// improve this code to support it better.
	Align RequiredAlign(VT.getSizeInBits() / 8);
	SDValue Chain = LD->getChain();
	// Make sure the stack object alignment is at least 16 or 32.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MaybeAlign InferredAlign = DAG.InferPtrAlign(Ptr);
	if (!InferredAlign \|\| *InferredAlign < RequiredAlign) {
	if (MFI.isFixedObjectIndex(FI)) {
	// Can't change the alignment. FIXME: It's possible to compute
	// the exact stack offset and reference FI + adjust offset instead.
	// If someone really cares about this. That's the way to implement it.
	return SDValue();
	} else {
	MFI.setObjectAlignment(FI, RequiredAlign);
	}
	}

	// (Offset % 16 or 32) must be multiple of 4. Then address is then
	// Ptr + (Offset & ~15).
	if (Offset < 0)
	return SDValue();
	if ((Offset % RequiredAlign.value()) & 3)
	return SDValue();
	int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1);
	if (StartOffset) {
	SDLoc DL(Ptr);
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
	}

	int EltNo = (Offset - StartOffset) >> 2;
	unsigned NumElems = VT.getVectorNumElements();

	EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
	SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
	LD->getPointerInfo().getWithOffset(StartOffset));

	SmallVector<int, 8> Mask(NumElems, EltNo);

	return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
	}

	return SDValue();
	}

	// Recurse to find a LoadSDNode source and the accumulated ByteOffest.
	static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
	if (ISD::isNON_EXTLoad(Elt.getNode())) {
	auto *BaseLd = cast<LoadSDNode>(Elt);
	if (!BaseLd->isSimple())
	return false;
	Ld = BaseLd;
	ByteOffset = 0;
	return true;
	}

	switch (Elt.getOpcode()) {
	case ISD::BITCAST:
	case ISD::TRUNCATE:
	case ISD::SCALAR_TO_VECTOR:
	return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset);
	case ISD::SRL:
	if (auto *AmtC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
	uint64_t Amt = AmtC->getZExtValue();
	if ((Amt % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
	ByteOffset += Amt / 8;
	return true;
	}
	}
	break;
	case ISD::EXTRACT_VECTOR_ELT:
	if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
	SDValue Src = Elt.getOperand(0);
	unsigned SrcSizeInBits = Src.getScalarValueSizeInBits();
	unsigned DstSizeInBits = Elt.getScalarValueSizeInBits();
	if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 &&
	findEltLoadSrc(Src, Ld, ByteOffset)) {
	uint64_t Idx = IdxC->getZExtValue();
	ByteOffset += Idx * (SrcSizeInBits / 8);
	return true;
	}
	}
	break;
	}

	return false;
	}

	/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
	/// elements can be replaced by a single large load which has the same value as
	/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
	///
	/// Example: <load i32 a, load i32 a+4, zero, undef> -> zextload a
	static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
	const SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	bool IsAfterLegalize) {
	if ((VT.getScalarSizeInBits() % 8) != 0)
	return SDValue();

	unsigned NumElems = Elts.size();

	int LastLoadedElt = -1;
	APInt LoadMask = APInt::getZero(NumElems);
	APInt ZeroMask = APInt::getZero(NumElems);
	APInt UndefMask = APInt::getZero(NumElems);

	SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
	SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);

	// For each element in the initializer, see if we've found a load, zero or an
	// undef.
	for (unsigned i = 0; i < NumElems; ++i) {
	SDValue Elt = peekThroughBitcasts(Elts[i]);
	if (!Elt.getNode())
	return SDValue();
	if (Elt.isUndef()) {
	UndefMask.setBit(i);
	continue;
	}
	if (X86::isZeroNode(Elt) \|\| ISD::isBuildVectorAllZeros(Elt.getNode())) {
	ZeroMask.setBit(i);
	continue;
	}

	// Each loaded element must be the correct fractional portion of the
	// requested vector load.
	unsigned EltSizeInBits = Elt.getValueSizeInBits();
	if ((NumElems * EltSizeInBits) != VT.getSizeInBits())
	return SDValue();

	if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) \|\| ByteOffsets[i] < 0)
	return SDValue();
	unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0);
	if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits)
	return SDValue();

	LoadMask.setBit(i);
	LastLoadedElt = i;
	}
	assert((ZeroMask.countPopulation() + UndefMask.countPopulation() +
	LoadMask.countPopulation()) == NumElems &&
	"Incomplete element masks");

	// Handle Special Cases - all undef or undef/zero.
	if (UndefMask.countPopulation() == NumElems)
	return DAG.getUNDEF(VT);
	if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems)
	return VT.isInteger() ? DAG.getConstant(0, DL, VT)
	: DAG.getConstantFP(0.0, DL, VT);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	int FirstLoadedElt = LoadMask.countTrailingZeros();
	SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
	EVT EltBaseVT = EltBase.getValueType();
	assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
	"Register/Memory size mismatch");
	LoadSDNode *LDBase = Loads[FirstLoadedElt];
	assert(LDBase && "Did not find base load for merging consecutive loads");
	unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
	unsigned BaseSizeInBytes = BaseSizeInBits / 8;
	int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt);
	int LoadSizeInBits = NumLoadedElts * BaseSizeInBits;
	assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");

	// TODO: Support offsetting the base load.
	if (ByteOffsets[FirstLoadedElt] != 0)
	return SDValue();

	// Check to see if the element's load is consecutive to the base load
	// or offset from a previous (already checked) load.
	auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
	LoadSDNode *Ld = Loads[EltIdx];
	int64_t ByteOffset = ByteOffsets[EltIdx];
	if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
	int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
	return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] &&
	Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0);
	}
	return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes,
	EltIdx - FirstLoadedElt);
	};

	// Consecutive loads can contain UNDEFS but not ZERO elements.
	// Consecutive loads with UNDEFs and ZEROs elements require a
	// an additional shuffle stage to clear the ZERO elements.
	bool IsConsecutiveLoad = true;
	bool IsConsecutiveLoadWithZeros = true;
	for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
	if (LoadMask[i]) {
	if (!CheckConsecutiveLoad(LDBase, i)) {
	IsConsecutiveLoad = false;
	IsConsecutiveLoadWithZeros = false;
	break;
	}
	} else if (ZeroMask[i]) {
	IsConsecutiveLoad = false;
	}
	}

	auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
	auto MMOFlags = LDBase->getMemOperand()->getFlags();
	assert(LDBase->isSimple() &&
	"Cannot merge volatile or atomic loads.");
	SDValue NewLd =
	DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
	LDBase->getPointerInfo(), LDBase->getOriginalAlign(),
	MMOFlags);
	for (auto *LD : Loads)
	if (LD)
	DAG.makeEquivalentMemoryOrdering(LD, NewLd);
	return NewLd;
	};

	// Check if the base load is entirely dereferenceable.
	bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable(
	VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout());

	// LOAD - all consecutive load/undefs (must start/end with a load or be
	// entirely dereferenceable). If we have found an entire vector of loads and
	// undefs, then return a large load of the entire vector width starting at the
	// base pointer. If the vector contains zeros, then attempt to shuffle those
	// elements.
	if (FirstLoadedElt == 0 &&
	(NumLoadedElts == (int)NumElems \|\| IsDereferenceable) &&
	(IsConsecutiveLoad \|\| IsConsecutiveLoadWithZeros)) {
	if (IsAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
	return SDValue();

	// Don't create 256-bit non-temporal aligned loads without AVX2 as these
	// will lower to regular temporal loads and use the cache.
	if (LDBase->isNonTemporal() && LDBase->getAlign() >= Align(32) &&
	VT.is256BitVector() && !Subtarget.hasInt256())
	return SDValue();

	if (NumElems == 1)
	return DAG.getBitcast(VT, Elts[FirstLoadedElt]);

	if (!ZeroMask)
	return CreateLoad(VT, LDBase);

	// IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
	// vector and a zero vector to clear out the zero elements.
	if (!IsAfterLegalize && VT.isVector()) {
	unsigned NumMaskElts = VT.getVectorNumElements();
	if ((NumMaskElts % NumElems) == 0) {
	unsigned Scale = NumMaskElts / NumElems;
	SmallVector<int, 4> ClearMask(NumMaskElts, -1);
	for (unsigned i = 0; i < NumElems; ++i) {
	if (UndefMask[i])
	continue;
	int Offset = ZeroMask[i] ? NumMaskElts : 0;
	for (unsigned j = 0; j != Scale; ++j)
	ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset;
	}
	SDValue V = CreateLoad(VT, LDBase);
	SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
	: DAG.getConstantFP(0.0, DL, VT);
	return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
	}
	}
	}

	// If the upper half of a ymm/zmm load is undef then just load the lower half.
	if (VT.is256BitVector() \|\| VT.is512BitVector()) {
	unsigned HalfNumElems = NumElems / 2;
	if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnes()) {
	EVT HalfVT =
	EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
	SDValue HalfLD =
	EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL,
	DAG, Subtarget, IsAfterLegalize);
	if (HalfLD)
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
	HalfLD, DAG.getIntPtrConstant(0, DL));
	}
	}

	// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
	if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
	((LoadSizeInBits == 16 && Subtarget.hasFP16()) \|\| LoadSizeInBits == 32 \|\|
	LoadSizeInBits == 64) &&
	((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()))) {
	MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
	: MVT::getIntegerVT(LoadSizeInBits);
	MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits);
	// Allow v4f32 on SSE1 only targets.
	// FIXME: Add more isel patterns so we can just use VT directly.
	if (!Subtarget.hasSSE2() && VT == MVT::v4f32)
	VecVT = MVT::v4f32;
	if (TLI.isTypeLegal(VecVT)) {
	SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
	SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
	SDValue ResNode = DAG.getMemIntrinsicNode(
	X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(),
	LDBase->getOriginalAlign(), MachineMemOperand::MOLoad);
	for (auto *LD : Loads)
	if (LD)
	DAG.makeEquivalentMemoryOrdering(LD, ResNode);
	return DAG.getBitcast(VT, ResNode);
	}
	}

	// BROADCAST - match the smallest possible repetition pattern, load that
	// scalar/subvector element and then broadcast to the entire vector.
	if (ZeroMask.isZero() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
	(VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector())) {
	for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
	unsigned RepeatSize = SubElems * BaseSizeInBits;
	unsigned ScalarSize = std::min(RepeatSize, 64u);
	if (!Subtarget.hasAVX2() && ScalarSize < 32)
	continue;

	// Don't attempt a 1:N subvector broadcast - it should be caught by
	// combineConcatVectorOps, else will cause infinite loops.
	if (RepeatSize > ScalarSize && SubElems == 1)
	continue;

	bool Match = true;
	SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT));
	for (unsigned i = 0; i != NumElems && Match; ++i) {
	if (!LoadMask[i])
	continue;
	SDValue Elt = peekThroughBitcasts(Elts[i]);
	if (RepeatedLoads[i % SubElems].isUndef())
	RepeatedLoads[i % SubElems] = Elt;
	else
	Match &= (RepeatedLoads[i % SubElems] == Elt);
	}

	// We must have loads at both ends of the repetition.
	Match &= !RepeatedLoads.front().isUndef();
	Match &= !RepeatedLoads.back().isUndef();
	if (!Match)
	continue;

	EVT RepeatVT =
	VT.isInteger() && (RepeatSize != 64 \|\| TLI.isTypeLegal(MVT::i64))
	? EVT::getIntegerVT(*DAG.getContext(), ScalarSize)
	: EVT::getFloatingPointVT(ScalarSize);
	if (RepeatSize > ScalarSize)
	RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT,
	RepeatSize / ScalarSize);
	EVT BroadcastVT =
	EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(),
	VT.getSizeInBits() / ScalarSize);
	if (TLI.isTypeLegal(BroadcastVT)) {
	if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
	RepeatVT, RepeatedLoads, DL, DAG, Subtarget, IsAfterLegalize)) {
	SDValue Broadcast = RepeatLoad;
	if (RepeatSize > ScalarSize) {
	while (Broadcast.getValueSizeInBits() < VT.getSizeInBits())
	Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL);
	} else {
	if (!Subtarget.hasAVX2() &&
	!X86::mayFoldLoadIntoBroadcastFromMem(
	RepeatLoad, RepeatVT.getScalarType().getSimpleVT(),
	Subtarget,
	/AssumeSingleUse=/true))
	return SDValue();
	Broadcast =
	DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad);
	}
	return DAG.getBitcast(VT, Broadcast);
	}
	}
	}
	}

	return SDValue();
	}

	// Combine a vector ops (shuffles etc.) that is equal to build_vector load1,
	// load2, load3, load4, <0, 1, 2, 3> into a vector load if the load addresses
	// are consecutive, non-overlapping, and in the right order.
	static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	bool IsAfterLegalize) {
	SmallVector<SDValue, 64> Elts;
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
	if (SDValue Elt = getShuffleScalarElt(Op, i, DAG, 0)) {
	Elts.push_back(Elt);
	continue;
	}
	return SDValue();
	}
	assert(Elts.size() == VT.getVectorNumElements());
	return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget,
	IsAfterLegalize);
	}

	static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
	unsigned SplatBitSize, LLVMContext &C) {
	unsigned ScalarSize = VT.getScalarSizeInBits();
	unsigned NumElm = SplatBitSize / ScalarSize;

	SmallVector<Constant *, 32> ConstantVec;
	for (unsigned i = 0; i < NumElm; i++) {
	APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
	Constant *Const;
	if (VT.isFloatingPoint()) {
	if (ScalarSize == 16) {
	Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
	} else if (ScalarSize == 32) {
	Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
	} else {
	assert(ScalarSize == 64 && "Unsupported floating point scalar size");
	Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
	}
	} else
	Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
	ConstantVec.push_back(Const);
	}
	return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
	}

	static bool isFoldableUseOfShuffle(SDNode *N) {
	for (auto *U : N->uses()) {
	unsigned Opc = U->getOpcode();
	// VPERMV/VPERMV3 shuffles can never fold their index operands.
	if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N)
	return false;
	if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N)
	return false;
	if (isTargetShuffle(Opc))
	return true;
	if (Opc == ISD::BITCAST) // Ignore bitcasts
	return isFoldableUseOfShuffle(U);
	if (N->hasOneUse()) {
	// TODO, there may be some general way to know if a SDNode can
	// be folded. We now only know whether an MI is foldable.
	if (Opc == X86ISD::VPDPBUSD && U->getOperand(2).getNode() != N)
	return false;
	return true;
	}
	}
	return false;
	}

	/// Attempt to use the vbroadcast instruction to generate a splat value
	/// from a splat BUILD_VECTOR which uses:
	/// a. A single scalar load, or a constant.
	/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
	///
	/// The VBROADCAST node is returned when a pattern is found,
	/// or SDValue() otherwise.
	static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// VBROADCAST requires AVX.
	// TODO: Splats could be generated for non-AVX CPUs using SSE
	// instructions, but there's less potential gain for only 128-bit vectors.
	if (!Subtarget.hasAVX())
	return SDValue();

	MVT VT = BVOp->getSimpleValueType(0);
	unsigned NumElts = VT.getVectorNumElements();
	SDLoc dl(BVOp);

	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()) &&
	"Unsupported vector type for broadcast.");

	// See if the build vector is a repeating sequence of scalars (inc. splat).
	SDValue Ld;
	BitVector UndefElements;
	SmallVector<SDValue, 16> Sequence;
	if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) {
	assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit.");
	if (Sequence.size() == 1)
	Ld = Sequence[0];
	}

	// Attempt to use VBROADCASTM
	// From this pattern:
	// a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
	// b. t1 = (build_vector t0 t0)
	//
	// Create (VBROADCASTM v2i1 X)
	if (!Sequence.empty() && Subtarget.hasCDI()) {
	// If not a splat, are the upper sequence values zeroable?
	unsigned SeqLen = Sequence.size();
	bool UpperZeroOrUndef =
	SeqLen == 1 \|\|
	llvm::all_of(ArrayRef(Sequence).drop_front(), [](SDValue V) {
	return !V \|\| V.isUndef() \|\| isNullConstant(V);
	});
	SDValue Op0 = Sequence[0];
	if (UpperZeroOrUndef && ((Op0.getOpcode() == ISD::BITCAST) \|\|
	(Op0.getOpcode() == ISD::ZERO_EXTEND &&
	Op0.getOperand(0).getOpcode() == ISD::BITCAST))) {
	SDValue BOperand = Op0.getOpcode() == ISD::BITCAST
	? Op0.getOperand(0)
	: Op0.getOperand(0).getOperand(0);
	MVT MaskVT = BOperand.getSimpleValueType();
	MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
	if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) \|\| // for broadcastmb2q
	(EltType == MVT::i32 && MaskVT == MVT::v16i1)) { // for broadcastmw2d
	MVT BcstVT = MVT::getVectorVT(EltType, NumElts / SeqLen);
	if (!VT.is512BitVector() && !Subtarget.hasVLX()) {
	unsigned Scale = 512 / VT.getSizeInBits();
	BcstVT = MVT::getVectorVT(EltType, Scale * (NumElts / SeqLen));
	}
	SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand);
	if (BcstVT.getSizeInBits() != VT.getSizeInBits())
	Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits());
	return DAG.getBitcast(VT, Bcst);
	}
	}
	}

	unsigned NumUndefElts = UndefElements.count();
	if (!Ld \|\| (NumElts - NumUndefElts) <= 1) {
	APInt SplatValue, Undef;
	unsigned SplatBitSize;
	bool HasUndef;
	// Check if this is a repeated constant pattern suitable for broadcasting.
	if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
	SplatBitSize > VT.getScalarSizeInBits() &&
	SplatBitSize < VT.getSizeInBits()) {
	// Avoid replacing with broadcast when it's a use of a shuffle
	// instruction to preserve the present custom lowering of shuffles.
	if (isFoldableUseOfShuffle(BVOp))
	return SDValue();
	// replace BUILD_VECTOR with broadcast of the repeated constants.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	LLVMContext *Ctx = DAG.getContext();
	MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
	if (Subtarget.hasAVX()) {
	if (SplatBitSize == 32 \|\| SplatBitSize == 64 \|\|
	(SplatBitSize < 32 && Subtarget.hasAVX2())) {
	// Splatted value can fit in one INTEGER constant in constant pool.
	// Load the constant and broadcast it.
	MVT CVT = MVT::getIntegerVT(SplatBitSize);
	Type ScalarTy = Type::getIntNTy(Ctx, SplatBitSize);
	Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
	SDValue CP = DAG.getConstantPool(C, PVT);
	unsigned Repeat = VT.getSizeInBits() / SplatBitSize;

	Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
	SDVTList Tys =
	DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other);
	SDValue Ops[] = {DAG.getEntryNode(), CP};
	MachinePointerInfo MPI =
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
	SDValue Brdcst = DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, MPI, Alignment,
	MachineMemOperand::MOLoad);
	return DAG.getBitcast(VT, Brdcst);
	}
	if (SplatBitSize > 64) {
	// Load the vector of constants and broadcast it.
	Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
	*Ctx);
	SDValue VCP = DAG.getConstantPool(VecC, PVT);
	unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
	MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm);
	Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {DAG.getEntryNode(), VCP};
	MachinePointerInfo MPI =
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
	return DAG.getMemIntrinsicNode(
	X86ISD::SUBV_BROADCAST_LOAD, dl, Tys, Ops, VVT, MPI, Alignment,
	MachineMemOperand::MOLoad);
	}
	}
	}

	// If we are moving a scalar into a vector (Ld must be set and all elements
	// but 1 are undef) and that operation is not obviously supported by
	// vmovd/vmovq/vmovss/vmovsd, then keep trying to form a broadcast.
	// That's better than general shuffling and may eliminate a load to GPR and
	// move from scalar to vector register.
	if (!Ld \|\| NumElts - NumUndefElts != 1)
	return SDValue();
	unsigned ScalarSize = Ld.getValueSizeInBits();
	if (!(UndefElements[0] \|\| (ScalarSize != 32 && ScalarSize != 64)))
	return SDValue();
	}

	bool ConstSplatVal =
	(Ld.getOpcode() == ISD::Constant \|\| Ld.getOpcode() == ISD::ConstantFP);
	bool IsLoad = ISD::isNormalLoad(Ld.getNode());

	// TODO: Handle broadcasts of non-constant sequences.

	// Make sure that all of the users of a non-constant load are from the
	// BUILD_VECTOR node.
	// FIXME: Is the use count needed for non-constant, non-load case?
	if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode()))
	return SDValue();

	unsigned ScalarSize = Ld.getValueSizeInBits();
	bool IsGE256 = (VT.getSizeInBits() >= 256);

	// When optimizing for size, generate up to 5 extra bytes for a broadcast
	// instruction to save 8 or more bytes of constant pool data.
	// TODO: If multiple splats are generated to load the same constant,
	// it may be detrimental to overall size. There needs to be a way to detect
	// that condition to know if this is truly a size win.
	bool OptForSize = DAG.shouldOptForSize();

	// Handle broadcasting a single constant scalar from the constant pool
	// into a vector.
	// On Sandybridge (no AVX2), it is still better to load a constant vector
	// from the constant pool and not to broadcast it from a scalar.
	// But override that restriction when optimizing for size.
	// TODO: Check if splatting is recommended for other AVX-capable CPUs.
	if (ConstSplatVal && (Subtarget.hasAVX2() \|\| OptForSize)) {
	EVT CVT = Ld.getValueType();
	assert(!CVT.isVector() && "Must not broadcast a vector type");

	// Splat f16, f32, i32, v4f64, v4i64 in all cases with AVX2.
	// For size optimization, also splat v2f64 and v2i64, and for size opt
	// with AVX2, also splat i8 and i16.
	// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
	if (ScalarSize == 32 \|\|
	(ScalarSize == 64 && (IsGE256 \|\| Subtarget.hasVLX())) \|\|
	CVT == MVT::f16 \|\|
	(OptForSize && (ScalarSize == 64 \|\| Subtarget.hasAVX2()))) {
	const Constant *C = nullptr;
	if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
	C = CI->getConstantIntValue();
	else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
	C = CF->getConstantFPValue();

	assert(C && "Invalid constant type");

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue CP =
	DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
	Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();

	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {DAG.getEntryNode(), CP};
	MachinePointerInfo MPI =
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
	return DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT,
	MPI, Alignment, MachineMemOperand::MOLoad);
	}
	}

	// Handle AVX2 in-register broadcasts.
	if (!IsLoad && Subtarget.hasInt256() &&
	(ScalarSize == 32 \|\| (IsGE256 && ScalarSize == 64)))
	return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);

	// The scalar source must be a normal load.
	if (!IsLoad)
	return SDValue();

	// Make sure the non-chain result is only used by this build vector.
	if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0))
	return SDValue();

	if (ScalarSize == 32 \|\| (IsGE256 && ScalarSize == 64) \|\|
	(Subtarget.hasVLX() && ScalarSize == 64)) {
	auto *LN = cast<LoadSDNode>(Ld);
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
	SDValue BCast =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
	LN->getMemoryVT(), LN->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
	return BCast;
	}

	// The integer check is needed for the 64-bit into 128-bit so it doesn't match
	// double since there is no vbroadcastsd xmm
	if (Subtarget.hasInt256() && Ld.getValueType().isInteger() &&
	(ScalarSize == 8 \|\| ScalarSize == 16 \|\| ScalarSize == 64)) {
	auto *LN = cast<LoadSDNode>(Ld);
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
	SDValue BCast =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
	LN->getMemoryVT(), LN->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
	return BCast;
	}

	if (ScalarSize == 16 && Subtarget.hasFP16() && IsGE256)
	return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);

	// Unsupported broadcast.
	return SDValue();
	}

	/// For an EXTRACT_VECTOR_ELT with a constant index return the real
	/// underlying vector and index.
	///
	/// Modifies \p ExtractedFromVec to the real vector and returns the real
	/// index.
	static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
	SDValue ExtIdx) {
	int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
	if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
	return Idx;

	// For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
	// lowered this:
	// (extract_vector_elt (v8f32 %1), Constant<6>)
	// to:
	// (extract_vector_elt (vector_shuffle<2,u,u,u>
	// (extract_subvector (v8f32 %0), Constant<4>),
	// undef)
	// Constant<0>)
	// In this case the vector is the extract_subvector expression and the index
	// is 2, as specified by the shuffle.
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
	SDValue ShuffleVec = SVOp->getOperand(0);
	MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
	assert(ShuffleVecVT.getVectorElementType() ==
	ExtractedFromVec.getSimpleValueType().getVectorElementType());

	int ShuffleIdx = SVOp->getMaskElt(Idx);
	if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
	ExtractedFromVec = ShuffleVec;
	return ShuffleIdx;
	}
	return Idx;
	}

	static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	// Skip if insert_vec_elt is not supported.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
	return SDValue();

	SDLoc DL(Op);
	unsigned NumElems = Op.getNumOperands();

	SDValue VecIn1;
	SDValue VecIn2;
	SmallVector<unsigned, 4> InsertIndices;
	SmallVector<int, 8> Mask(NumElems, -1);

	for (unsigned i = 0; i != NumElems; ++i) {
	unsigned Opc = Op.getOperand(i).getOpcode();

	if (Opc == ISD::UNDEF)
	continue;

	if (Opc != ISD::EXTRACT_VECTOR_ELT) {
	// Quit if more than 1 elements need inserting.
	if (InsertIndices.size() > 1)
	return SDValue();

	InsertIndices.push_back(i);
	continue;
	}

	SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
	SDValue ExtIdx = Op.getOperand(i).getOperand(1);

	// Quit if non-constant index.
	if (!isa<ConstantSDNode>(ExtIdx))
	return SDValue();
	int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);

	// Quit if extracted from vector of different type.
	if (ExtractedFromVec.getValueType() != VT)
	return SDValue();

	if (!VecIn1.getNode())
	VecIn1 = ExtractedFromVec;
	else if (VecIn1 != ExtractedFromVec) {
	if (!VecIn2.getNode())
	VecIn2 = ExtractedFromVec;
	else if (VecIn2 != ExtractedFromVec)
	// Quit if more than 2 vectors to shuffle
	return SDValue();
	}

	if (ExtractedFromVec == VecIn1)
	Mask[i] = Idx;
	else if (ExtractedFromVec == VecIn2)
	Mask[i] = Idx + NumElems;
	}

	if (!VecIn1.getNode())
	return SDValue();

	VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
	SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);

	for (unsigned Idx : InsertIndices)
	NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
	DAG.getIntPtrConstant(Idx, DL));

	return NV;
	}

	// Lower BUILD_VECTOR operation for v8bf16, v16bf16 and v32bf16 types.
	static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	MVT IVT = VT.changeVectorElementTypeToInteger();
	SmallVector<SDValue, 16> NewOps;
	for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
	NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
	SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
	return DAG.getBitcast(VT, Res);
	}

	// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
	static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {

	MVT VT = Op.getSimpleValueType();
	assert((VT.getVectorElementType() == MVT::i1) &&
	"Unexpected type in LowerBUILD_VECTORvXi1!");

	SDLoc dl(Op);
	if (ISD::isBuildVectorAllZeros(Op.getNode()) \|\|
	ISD::isBuildVectorAllOnes(Op.getNode()))
	return Op;

	uint64_t Immediate = 0;
	SmallVector<unsigned, 16> NonConstIdx;
	bool IsSplat = true;
	bool HasConstElts = false;
	int SplatIdx = -1;
	for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
	SDValue In = Op.getOperand(idx);
	if (In.isUndef())
	continue;
	if (auto *InC = dyn_cast<ConstantSDNode>(In)) {
	Immediate \|= (InC->getZExtValue() & 0x1) << idx;
	HasConstElts = true;
	} else {
	NonConstIdx.push_back(idx);
	}
	if (SplatIdx < 0)
	SplatIdx = idx;
	else if (In != Op.getOperand(SplatIdx))
	IsSplat = false;
	}

	// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
	if (IsSplat) {
	// The build_vector allows the scalar element to be larger than the vector
	// element type. We need to mask it to use as a condition unless we know
	// the upper bits are zero.
	// FIXME: Use computeKnownBits instead of checking specific opcode?
	SDValue Cond = Op.getOperand(SplatIdx);
	assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!");
	if (Cond.getOpcode() != ISD::SETCC)
	Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond,
	DAG.getConstant(1, dl, MVT::i8));

	// Perform the select in the scalar domain so we can use cmov.
	if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
	SDValue Select = DAG.getSelect(dl, MVT::i32, Cond,
	DAG.getAllOnesConstant(dl, MVT::i32),
	DAG.getConstant(0, dl, MVT::i32));
	Select = DAG.getBitcast(MVT::v32i1, Select);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select);
	} else {
	MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));
	SDValue Select = DAG.getSelect(dl, ImmVT, Cond,
	DAG.getAllOnesConstant(dl, ImmVT),
	DAG.getConstant(0, dl, ImmVT));
	MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
	Select = DAG.getBitcast(VecVT, Select);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select,
	DAG.getIntPtrConstant(0, dl));
	}
	}

	// insert elements one by one
	SDValue DstVec;
	if (HasConstElts) {
	if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
	SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32);
	SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32);
	ImmL = DAG.getBitcast(MVT::v32i1, ImmL);
	ImmH = DAG.getBitcast(MVT::v32i1, ImmH);
	DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH);
	} else {
	MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));
	SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT);
	MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
	DstVec = DAG.getBitcast(VecVT, Imm);
	DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec,
	DAG.getIntPtrConstant(0, dl));
	}
	} else
	DstVec = DAG.getUNDEF(VT);

	for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
	unsigned InsertIdx = NonConstIdx[i];
	DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
	Op.getOperand(InsertIdx),
	DAG.getIntPtrConstant(InsertIdx, dl));
	}
	return DstVec;
	}

	LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) {
	switch (Opcode) {
	case X86ISD::PACKSS:
	case X86ISD::PACKUS:
	case X86ISD::FHADD:
	case X86ISD::FHSUB:
	case X86ISD::HADD:
	case X86ISD::HSUB:
	return true;
	}
	return false;
	}

	/// This is a helper function of LowerToHorizontalOp().
	/// This function checks that the build_vector \p N in input implements a
	/// 128-bit partial horizontal operation on a 256-bit vector, but that operation
	/// may not match the layout of an x86 256-bit horizontal instruction.
	/// In other words, if this returns true, then some extraction/insertion will
	/// be required to produce a valid horizontal instruction.
	///
	/// Parameter \p Opcode defines the kind of horizontal operation to match.
	/// For example, if \p Opcode is equal to ISD::ADD, then this function
	/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
	/// is equal to ISD::SUB, then this function checks if this is a horizontal
	/// arithmetic sub.
	///
	/// This function only analyzes elements of \p N whose indices are
	/// in range [BaseIdx, LastIdx).
	///
	/// TODO: This function was originally used to match both real and fake partial
	/// horizontal operations, but the index-matching logic is incorrect for that.
	/// See the corrected implementation in isHopBuildVector(). Can we reduce this
	/// code because it is only used for partial h-op matching now?
	static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
	SelectionDAG &DAG,
	unsigned BaseIdx, unsigned LastIdx,
	SDValue &V0, SDValue &V1) {
	EVT VT = N->getValueType(0);
	assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops");
	assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
	assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
	"Invalid Vector in input!");

	bool IsCommutable = (Opcode == ISD::ADD \|\| Opcode == ISD::FADD);
	bool CanFold = true;
	unsigned ExpectedVExtractIdx = BaseIdx;
	unsigned NumElts = LastIdx - BaseIdx;
	V0 = DAG.getUNDEF(VT);
	V1 = DAG.getUNDEF(VT);

	// Check if N implements a horizontal binop.
	for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
	SDValue Op = N->getOperand(i + BaseIdx);

	// Skip UNDEFs.
	if (Op->isUndef()) {
	// Update the expected vector extract index.
	if (i * 2 == NumElts)
	ExpectedVExtractIdx = BaseIdx;
	ExpectedVExtractIdx += 2;
	continue;
	}

	CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();

	if (!CanFold)
	break;

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// Try to match the following pattern:
	// (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
	CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Op0.getOperand(0) == Op1.getOperand(0) &&
	isa<ConstantSDNode>(Op0.getOperand(1)) &&
	isa<ConstantSDNode>(Op1.getOperand(1)));
	if (!CanFold)
	break;

	unsigned I0 = Op0.getConstantOperandVal(1);
	unsigned I1 = Op1.getConstantOperandVal(1);

	if (i * 2 < NumElts) {
	if (V0.isUndef()) {
	V0 = Op0.getOperand(0);
	if (V0.getValueType() != VT)
	return false;
	}
	} else {
	if (V1.isUndef()) {
	V1 = Op0.getOperand(0);
	if (V1.getValueType() != VT)
	return false;
	}
	if (i * 2 == NumElts)
	ExpectedVExtractIdx = BaseIdx;
	}

	SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
	if (I0 == ExpectedVExtractIdx)
	CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
	else if (IsCommutable && I1 == ExpectedVExtractIdx) {
	// Try to match the following dag sequence:
	// (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
	CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
	} else
	CanFold = false;

	ExpectedVExtractIdx += 2;
	}

	return CanFold;
	}

	/// Emit a sequence of two 128-bit horizontal add/sub followed by
	/// a concat_vector.
	///
	/// This is a helper function of LowerToHorizontalOp().
	/// This function expects two 256-bit vectors called V0 and V1.
	/// At first, each vector is split into two separate 128-bit vectors.
	/// Then, the resulting 128-bit vectors are used to implement two
	/// horizontal binary operations.
	///
	/// The kind of horizontal binary operation is defined by \p X86Opcode.
	///
	/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
	/// the two new horizontal binop.
	/// When Mode is set, the first horizontal binop dag node would take as input
	/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
	/// horizontal binop dag node would take as input the lower 128-bit of V1
	/// and the upper 128-bit of V1.
	/// Example:
	/// HADD V0_LO, V0_HI
	/// HADD V1_LO, V1_HI
	///
	/// Otherwise, the first horizontal binop dag node takes as input the lower
	/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
	/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
	/// Example:
	/// HADD V0_LO, V1_LO
	/// HADD V0_HI, V1_HI
	///
	/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
	/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
	/// the upper 128-bits of the result.
	static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
	const SDLoc &DL, SelectionDAG &DAG,
	unsigned X86Opcode, bool Mode,
	bool isUndefLO, bool isUndefHI) {
	MVT VT = V0.getSimpleValueType();
	assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&
	"Invalid nodes in input!");

	unsigned NumElts = VT.getVectorNumElements();
	SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
	SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
	SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
	SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
	MVT NewVT = V0_LO.getSimpleValueType();

	SDValue LO = DAG.getUNDEF(NewVT);
	SDValue HI = DAG.getUNDEF(NewVT);

	if (Mode) {
	// Don't emit a horizontal binop if the result is expected to be UNDEF.
	if (!isUndefLO && !V0->isUndef())
	LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
	if (!isUndefHI && !V1->isUndef())
	HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
	} else {
	// Don't emit a horizontal binop if the result is expected to be UNDEF.
	if (!isUndefLO && (!V0_LO->isUndef() \|\| !V1_LO->isUndef()))
	LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);

	if (!isUndefHI && (!V0_HI->isUndef() \|\| !V1_HI->isUndef()))
	HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
	}

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
	}

	/// Returns true iff \p BV builds a vector with the result equivalent to
	/// the result of ADDSUB/SUBADD operation.
	/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1
	/// (SUBADD = Opnd0 -+ Opnd1) operation are written to the parameters
	/// \p Opnd0 and \p Opnd1.
	static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	SDValue &Opnd0, SDValue &Opnd1,
	unsigned &NumExtracts,
	bool &IsSubAdd) {

	MVT VT = BV->getSimpleValueType(0);
	if (!Subtarget.hasSSE3() \|\| !VT.isFloatingPoint())
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	SDValue InVec0 = DAG.getUNDEF(VT);
	SDValue InVec1 = DAG.getUNDEF(VT);

	NumExtracts = 0;

	// Odd-numbered elements in the input build vector are obtained from
	// adding/subtracting two integer/float elements.
	// Even-numbered elements in the input build vector are obtained from
	// subtracting/adding two integer/float elements.
	unsigned Opc[2] = {0, 0};
	for (unsigned i = 0, e = NumElts; i != e; ++i) {
	SDValue Op = BV->getOperand(i);

	// Skip 'undef' values.
	unsigned Opcode = Op.getOpcode();
	if (Opcode == ISD::UNDEF)
	continue;

	// Early exit if we found an unexpected opcode.
	if (Opcode != ISD::FADD && Opcode != ISD::FSUB)
	return false;

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// Try to match the following pattern:
	// (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
	// Early exit if we cannot match that sequence.
	if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Op0.getOperand(1)) \|\|
	Op0.getOperand(1) != Op1.getOperand(1))
	return false;

	unsigned I0 = Op0.getConstantOperandVal(1);
	if (I0 != i)
	return false;

	// We found a valid add/sub node, make sure its the same opcode as previous
	// elements for this parity.
	if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode)
	return false;
	Opc[i % 2] = Opcode;

	// Update InVec0 and InVec1.
	if (InVec0.isUndef()) {
	InVec0 = Op0.getOperand(0);
	if (InVec0.getSimpleValueType() != VT)
	return false;
	}
	if (InVec1.isUndef()) {
	InVec1 = Op1.getOperand(0);
	if (InVec1.getSimpleValueType() != VT)
	return false;
	}

	// Make sure that operands in input to each add/sub node always
	// come from a same pair of vectors.
	if (InVec0 != Op0.getOperand(0)) {
	if (Opcode == ISD::FSUB)
	return false;

	// FADD is commutable. Try to commute the operands
	// and then test again.
	std::swap(Op0, Op1);
	if (InVec0 != Op0.getOperand(0))
	return false;
	}

	if (InVec1 != Op1.getOperand(0))
	return false;

	// Increment the number of extractions done.
	++NumExtracts;
	}

	// Ensure we have found an opcode for both parities and that they are
	// different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the
	// inputs are undef.
	if (!Opc[0] \|\| !Opc[1] \|\| Opc[0] == Opc[1] \|\|
	InVec0.isUndef() \|\| InVec1.isUndef())
	return false;

	IsSubAdd = Opc[0] == ISD::FADD;

	Opnd0 = InVec0;
	Opnd1 = InVec1;
	return true;
	}

	/// Returns true if is possible to fold MUL and an idiom that has already been
	/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
	/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
	/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
	///
	/// Prior to calling this function it should be known that there is some
	/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
	/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
	/// before replacement of such SDNode with ADDSUB operation. Thus the number
	/// of \p Opnd0 uses is expected to be equal to 2.
	/// For example, this function may be called for the following IR:
	/// %AB = fmul fast <2 x double> %A, %B
	/// %Sub = fsub fast <2 x double> %AB, %C
	/// %Add = fadd fast <2 x double> %AB, %C
	/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
	/// <2 x i32> <i32 0, i32 3>
	/// There is a def for %Addsub here, which potentially can be replaced by
	/// X86ISD::ADDSUB operation:
	/// %Addsub = X86ISD::ADDSUB %AB, %C
	/// and such ADDSUB can further be replaced with FMADDSUB:
	/// %Addsub = FMADDSUB %A, %B, %C.
	///
	/// The main reason why this method is called before the replacement of the
	/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
	/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
	/// FMADDSUB is.
	static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
	SelectionDAG &DAG,
	SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
	unsigned ExpectedUses) {
	if (Opnd0.getOpcode() != ISD::FMUL \|\|
	!Opnd0->hasNUsesOfValue(ExpectedUses, 0) \|\| !Subtarget.hasAnyFMA())
	return false;

	// FIXME: These checks must match the similar ones in
	// DAGCombiner::visitFADDForFMACombine. It would be good to have one
	// function that would answer if it is Ok to fuse MUL + ADD to FMADD
	// or MUL + ADDSUB to FMADDSUB.
	const TargetOptions &Options = DAG.getTarget().Options;
	bool AllowFusion =
	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath);
	if (!AllowFusion)
	return false;

	Opnd2 = Opnd1;
	Opnd1 = Opnd0.getOperand(1);
	Opnd0 = Opnd0.getOperand(0);

	return true;
	}

	/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' or
	/// 'fsubadd' operation accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB or
	/// X86ISD::FMSUBADD node.
	static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Opnd0, Opnd1;
	unsigned NumExtracts;
	bool IsSubAdd;
	if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
	IsSubAdd))
	return SDValue();

	MVT VT = BV->getSimpleValueType(0);
	SDLoc DL(BV);

	// Try to generate X86ISD::FMADDSUB node here.
	SDValue Opnd2;
	if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
	unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
	return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
	}

	// We only support ADDSUB.
	if (IsSubAdd)
	return SDValue();

	// There are no known X86 targets with 512-bit ADDSUB instructions!
	// Convert to blend(fsub,fadd).
	if (VT.is512BitVector()) {
	SmallVector<int> Mask;
	for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) {
	Mask.push_back(I);
	Mask.push_back(I + E + 1);
	}
	SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);
	SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);
	return DAG.getVectorShuffle(VT, DL, Sub, Add, Mask);
	}

	return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
	}

	static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
	unsigned &HOpcode, SDValue &V0, SDValue &V1) {
	// Initialize outputs to known values.
	MVT VT = BV->getSimpleValueType(0);
	HOpcode = ISD::DELETED_NODE;
	V0 = DAG.getUNDEF(VT);
	V1 = DAG.getUNDEF(VT);

	// x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit
	// half of the result is calculated independently from the 128-bit halves of
	// the inputs, so that makes the index-checking logic below more complicated.
	unsigned NumElts = VT.getVectorNumElements();
	unsigned GenericOpcode = ISD::DELETED_NODE;
	unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1;
	unsigned NumEltsIn128Bits = NumElts / Num128BitChunks;
	unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2;
	for (unsigned i = 0; i != Num128BitChunks; ++i) {
	for (unsigned j = 0; j != NumEltsIn128Bits; ++j) {
	// Ignore undef elements.
	SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j);
	if (Op.isUndef())
	continue;

	// If there's an opcode mismatch, we're done.
	if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode)
	return false;

	// Initialize horizontal opcode.
	if (HOpcode == ISD::DELETED_NODE) {
	GenericOpcode = Op.getOpcode();
	switch (GenericOpcode) {
	case ISD::ADD: HOpcode = X86ISD::HADD; break;
	case ISD::SUB: HOpcode = X86ISD::HSUB; break;
	case ISD::FADD: HOpcode = X86ISD::FHADD; break;
	case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
	default: return false;
	}
	}

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op0.getOperand(0) != Op1.getOperand(0) \|\|
	!isa<ConstantSDNode>(Op0.getOperand(1)) \|\|
	!isa<ConstantSDNode>(Op1.getOperand(1)) \|\| !Op.hasOneUse())
	return false;

	// The source vector is chosen based on which 64-bit half of the
	// destination vector is being calculated.
	if (j < NumEltsIn64Bits) {
	if (V0.isUndef())
	V0 = Op0.getOperand(0);
	} else {
	if (V1.isUndef())
	V1 = Op0.getOperand(0);
	}

	SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
	if (SourceVec != Op0.getOperand(0))
	return false;

	// op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
	unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
	unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
	unsigned ExpectedIndex = i * NumEltsIn128Bits +
	(j % NumEltsIn64Bits) * 2;
	if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
	continue;

	// If this is not a commutative op, this does not match.
	if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD)
	return false;

	// Addition is commutative, so try swapping the extract indexes.
	// op (extract_vector_elt A, I+1), (extract_vector_elt A, I)
	if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1)
	continue;

	// Extract indexes do not match horizontal requirement.
	return false;
	}
	}
	// We matched. Opcode and operands are returned by reference as arguments.
	return true;
	}

	static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
	SelectionDAG &DAG, unsigned HOpcode,
	SDValue V0, SDValue V1) {
	// If either input vector is not the same size as the build vector,
	// extract/insert the low bits to the correct size.
	// This is free (examples: zmm --> xmm, xmm --> ymm).
	MVT VT = BV->getSimpleValueType(0);
	unsigned Width = VT.getSizeInBits();
	if (V0.getValueSizeInBits() > Width)
	V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width);
	else if (V0.getValueSizeInBits() < Width)
	V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width);

	if (V1.getValueSizeInBits() > Width)
	V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width);
	else if (V1.getValueSizeInBits() < Width)
	V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);

	unsigned NumElts = VT.getVectorNumElements();
	APInt DemandedElts = APInt::getAllOnes(NumElts);
	for (unsigned i = 0; i != NumElts; ++i)
	if (BV->getOperand(i).isUndef())
	DemandedElts.clearBit(i);

	// If we don't need the upper xmm, then perform as a xmm hop.
	unsigned HalfNumElts = NumElts / 2;
	if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
	MVT HalfVT = VT.getHalfNumVectorElementsVT();
	V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
	V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
	SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
	return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256);
	}

	return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
	}

	/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
	static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// We need at least 2 non-undef elements to make this worthwhile by default.
	unsigned NumNonUndefs =
	count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
	if (NumNonUndefs < 2)
	return SDValue();

	// There are 4 sets of horizontal math operations distinguished by type:
	// int/FP at 128-bit/256-bit. Each type was introduced with a different
	// subtarget feature. Try to match those "native" patterns first.
	MVT VT = BV->getSimpleValueType(0);
	if (((VT == MVT::v4f32 \|\| VT == MVT::v2f64) && Subtarget.hasSSE3()) \|\|
	((VT == MVT::v8i16 \|\| VT == MVT::v4i32) && Subtarget.hasSSSE3()) \|\|
	((VT == MVT::v8f32 \|\| VT == MVT::v4f64) && Subtarget.hasAVX()) \|\|
	((VT == MVT::v16i16 \|\| VT == MVT::v8i32) && Subtarget.hasAVX2())) {
	unsigned HOpcode;
	SDValue V0, V1;
	if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
	return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
	}

	// Try harder to match 256-bit ops by using extract/concat.
	if (!Subtarget.hasAVX() \|\| !VT.is256BitVector())
	return SDValue();

	// Count the number of UNDEF operands in the build_vector in input.
	unsigned NumElts = VT.getVectorNumElements();
	unsigned Half = NumElts / 2;
	unsigned NumUndefsLO = 0;
	unsigned NumUndefsHI = 0;
	for (unsigned i = 0, e = Half; i != e; ++i)
	if (BV->getOperand(i)->isUndef())
	NumUndefsLO++;

	for (unsigned i = Half, e = NumElts; i != e; ++i)
	if (BV->getOperand(i)->isUndef())
	NumUndefsHI++;

	SDLoc DL(BV);
	SDValue InVec0, InVec1;
	if (VT == MVT::v8i32 \|\| VT == MVT::v16i16) {
	SDValue InVec2, InVec3;
	unsigned X86Opcode;
	bool CanFold = true;

	if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
	isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2,
	InVec3) &&
	((InVec0.isUndef() \|\| InVec2.isUndef()) \|\| InVec0 == InVec2) &&
	((InVec1.isUndef() \|\| InVec3.isUndef()) \|\| InVec1 == InVec3))
	X86Opcode = X86ISD::HADD;
	else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0,
	InVec1) &&
	isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2,
	InVec3) &&
	((InVec0.isUndef() \|\| InVec2.isUndef()) \|\| InVec0 == InVec2) &&
	((InVec1.isUndef() \|\| InVec3.isUndef()) \|\| InVec1 == InVec3))
	X86Opcode = X86ISD::HSUB;
	else
	CanFold = false;

	if (CanFold) {
	// Do not try to expand this build_vector into a pair of horizontal
	// add/sub if we can emit a pair of scalar add/sub.
	if (NumUndefsLO + 1 == Half \|\| NumUndefsHI + 1 == Half)
	return SDValue();

	// Convert this build_vector into a pair of horizontal binops followed by
	// a concat vector. We must adjust the outputs from the partial horizontal
	// matching calls above to account for undefined vector halves.
	SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
	SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
	assert((!V0.isUndef() \|\| !V1.isUndef()) && "Horizontal-op of undefs?");
	bool isUndefLO = NumUndefsLO == Half;
	bool isUndefHI = NumUndefsHI == Half;
	return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
	isUndefHI);
	}
	}

	if (VT == MVT::v8f32 \|\| VT == MVT::v4f64 \|\| VT == MVT::v8i32 \|\|
	VT == MVT::v16i16) {
	unsigned X86Opcode;
	if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
	X86Opcode = X86ISD::HADD;
	else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0,
	InVec1))
	X86Opcode = X86ISD::HSUB;
	else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0,
	InVec1))
	X86Opcode = X86ISD::FHADD;
	else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0,
	InVec1))
	X86Opcode = X86ISD::FHSUB;
	else
	return SDValue();

	// Don't try to expand this build_vector into a pair of horizontal add/sub
	// if we can simply emit a pair of scalar add/sub.
	if (NumUndefsLO + 1 == Half \|\| NumUndefsHI + 1 == Half)
	return SDValue();

	// Convert this build_vector into two horizontal add/sub followed by
	// a concat vector.
	bool isUndefLO = NumUndefsLO == Half;
	bool isUndefHI = NumUndefsHI == Half;
	return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
	isUndefLO, isUndefHI);
	}

	return SDValue();
	}

	static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG);

	/// If a BUILD_VECTOR's source elements all apply the same bit operation and
	/// one of their operands is constant, lower to a pair of BUILD_VECTOR and
	/// just apply the bit to the vectors.
	/// NOTE: Its not in our interest to start make a general purpose vectorizer
	/// from this, but enough scalar bit operations are created from the later
	/// legalization + scalarization stages to need basic support.
	static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc DL(Op);
	MVT VT = Op->getSimpleValueType(0);
	unsigned NumElems = VT.getVectorNumElements();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Check that all elements have the same opcode.
	// TODO: Should we allow UNDEFS and if so how many?
	unsigned Opcode = Op->getOperand(0).getOpcode();
	for (unsigned i = 1; i < NumElems; ++i)
	if (Opcode != Op->getOperand(i).getOpcode())
	return SDValue();

	// TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
	bool IsShift = false;
	switch (Opcode) {
	default:
	return SDValue();
	case ISD::SHL:
	case ISD::SRL:
	case ISD::SRA:
	IsShift = true;
	break;
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR:
	// Don't do this if the buildvector is a splat - we'd replace one
	// constant with an entire vector.
	if (Op->getSplatValue())
	return SDValue();
	if (!TLI.isOperationLegalOrPromote(Opcode, VT))
	return SDValue();
	break;
	}

	SmallVector<SDValue, 4> LHSElts, RHSElts;
	for (SDValue Elt : Op->ops()) {
	SDValue LHS = Elt.getOperand(0);
	SDValue RHS = Elt.getOperand(1);

	// We expect the canonicalized RHS operand to be the constant.
	if (!isa<ConstantSDNode>(RHS))
	return SDValue();

	// Extend shift amounts.
	if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
	if (!IsShift)
	return SDValue();
	RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
	}

	LHSElts.push_back(LHS);
	RHSElts.push_back(RHS);
	}

	// Limit to shifts by uniform immediates.
	// TODO: Only accept vXi8/vXi64 special cases?
	// TODO: Permit non-uniform XOP/AVX2/MULLO cases?
	if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
	return SDValue();

	SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
	SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
	SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);

	if (!IsShift)
	return Res;

	// Immediately lower the shift to ensure the constant build vector doesn't
	// get converted to a constant pool before the shift is lowered.
	return LowerShift(Res, Subtarget, DAG);
	}

	/// Create a vector constant without a load. SSE/AVX provide the bare minimum
	/// functionality to do this, so it's all zeros, all ones, or some derivation
	/// that is cheap to calculate.
	static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();

	// Vectors containing all zeros can be matched by pxor and xorps.
	if (ISD::isBuildVectorAllZeros(Op.getNode()))
	return Op;

	// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
	// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
	// vpcmpeqd on 256-bit vectors.
	if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
	if (VT == MVT::v4i32 \|\| VT == MVT::v8i32 \|\| VT == MVT::v16i32)
	return Op;

	return getOnesVector(VT, DAG, DL);
	}

	return SDValue();
	}

	/// Look for opportunities to create a VPERMV/VPERMILPV/PSHUFB variable permute
	/// from a vector of source values and a vector of extraction indices.
	/// The vectors might be manipulated to match the type of the permute op.
	static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
	SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT ShuffleVT = VT;
	EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned SizeInBits = VT.getSizeInBits();

	// Adjust IndicesVec to match VT size.
	assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts &&
	"Illegal variable permute mask size");
	if (IndicesVec.getValueType().getVectorNumElements() > NumElts) {
	// Narrow/widen the indices vector to the correct size.
	if (IndicesVec.getValueSizeInBits() > SizeInBits)
	IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
	NumElts * VT.getScalarSizeInBits());
	else if (IndicesVec.getValueSizeInBits() < SizeInBits)
	IndicesVec = widenSubVector(IndicesVec, false, Subtarget, DAG,
	SDLoc(IndicesVec), SizeInBits);
	// Zero-extend the index elements within the vector.
	if (IndicesVec.getValueType().getVectorNumElements() > NumElts)
	IndicesVec = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(IndicesVec),
	IndicesVT, IndicesVec);
	}
	IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);

	// Handle SrcVec that don't match VT type.
	if (SrcVec.getValueSizeInBits() != SizeInBits) {
	if ((SrcVec.getValueSizeInBits() % SizeInBits) == 0) {
	// Handle larger SrcVec by treating it as a larger permute.
	unsigned Scale = SrcVec.getValueSizeInBits() / SizeInBits;
	VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts);
	IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
	IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
	Subtarget, DAG, SDLoc(IndicesVec));
	SDValue NewSrcVec =
	createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
	if (NewSrcVec)
	return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
	return SDValue();
	} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
	// Widen smaller SrcVec to match VT.
	SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
	} else
	return SDValue();
	}

	auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) {
	assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale");
	EVT SrcVT = Idx.getValueType();
	unsigned NumDstBits = SrcVT.getScalarSizeInBits() / Scale;
	uint64_t IndexScale = 0;
	uint64_t IndexOffset = 0;

	// If we're scaling a smaller permute op, then we need to repeat the
	// indices, scaling and offsetting them as well.
	// e.g. v4i32 -> v16i8 (Scale = 4)
	// IndexScale = v4i32 Splat(4 << 24 \| 4 << 16 \| 4 << 8 \| 4)
	// IndexOffset = v4i32 Splat(3 << 24 \| 2 << 16 \| 1 << 8 \| 0)
	for (uint64_t i = 0; i != Scale; ++i) {
	IndexScale \|= Scale << (i * NumDstBits);
	IndexOffset \|= i << (i * NumDstBits);
	}

	Idx = DAG.getNode(ISD::MUL, SDLoc(Idx), SrcVT, Idx,
	DAG.getConstant(IndexScale, SDLoc(Idx), SrcVT));
	Idx = DAG.getNode(ISD::ADD, SDLoc(Idx), SrcVT, Idx,
	DAG.getConstant(IndexOffset, SDLoc(Idx), SrcVT));
	return Idx;
	};

	unsigned Opcode = 0;
	switch (VT.SimpleTy) {
	default:
	break;
	case MVT::v16i8:
	if (Subtarget.hasSSSE3())
	Opcode = X86ISD::PSHUFB;
	break;
	case MVT::v8i16:
	if (Subtarget.hasVLX() && Subtarget.hasBWI())
	Opcode = X86ISD::VPERMV;
	else if (Subtarget.hasSSSE3()) {
	Opcode = X86ISD::PSHUFB;
	ShuffleVT = MVT::v16i8;
	}
	break;
	case MVT::v4f32:
	case MVT::v4i32:
	if (Subtarget.hasAVX()) {
	Opcode = X86ISD::VPERMILPV;
	ShuffleVT = MVT::v4f32;
	} else if (Subtarget.hasSSSE3()) {
	Opcode = X86ISD::PSHUFB;
	ShuffleVT = MVT::v16i8;
	}
	break;
	case MVT::v2f64:
	case MVT::v2i64:
	if (Subtarget.hasAVX()) {
	// VPERMILPD selects using bit#1 of the index vector, so scale IndicesVec.
	IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
	Opcode = X86ISD::VPERMILPV;
	ShuffleVT = MVT::v2f64;
	} else if (Subtarget.hasSSE41()) {
	// SSE41 can compare v2i64 - select between indices 0 and 1.
	return DAG.getSelectCC(
	DL, IndicesVec,
	getZeroVector(IndicesVT.getSimpleVT(), Subtarget, DAG, DL),
	DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}),
	DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}),
	ISD::CondCode::SETEQ);
	}
	break;
	case MVT::v32i8:
	if (Subtarget.hasVLX() && Subtarget.hasVBMI())
	Opcode = X86ISD::VPERMV;
	else if (Subtarget.hasXOP()) {
	SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL);
	SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL);
	SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL);
	SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL);
	return DAG.getNode(
	ISD::CONCAT_VECTORS, DL, VT,
	DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx),
	DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx));
	} else if (Subtarget.hasAVX()) {
	SDValue Lo = extract128BitVector(SrcVec, 0, DAG, DL);
	SDValue Hi = extract128BitVector(SrcVec, 16, DAG, DL);
	SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo);
	SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi);
	auto PSHUFBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	// Permute Lo and Hi and then select based on index range.
	// This works as SHUFB uses bits[3:0] to permute elements and we don't
	// care about the bit[7] as its just an index vector.
	SDValue Idx = Ops[2];
	EVT VT = Idx.getValueType();
	return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT),
	DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx),
	DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx),
	ISD::CondCode::SETGT);
	};
	SDValue Ops[] = {LoLo, HiHi, IndicesVec};
	return SplitOpsAndApply(DAG, Subtarget, DL, MVT::v32i8, Ops,
	PSHUFBBuilder);
	}
	break;
	case MVT::v16i16:
	if (Subtarget.hasVLX() && Subtarget.hasBWI())
	Opcode = X86ISD::VPERMV;
	else if (Subtarget.hasAVX()) {
	// Scale to v32i8 and perform as v32i8.
	IndicesVec = ScaleIndices(IndicesVec, 2);
	return DAG.getBitcast(
	VT, createVariablePermute(
	MVT::v32i8, DAG.getBitcast(MVT::v32i8, SrcVec),
	DAG.getBitcast(MVT::v32i8, IndicesVec), DL, DAG, Subtarget));
	}
	break;
	case MVT::v8f32:
	case MVT::v8i32:
	if (Subtarget.hasAVX2())
	Opcode = X86ISD::VPERMV;
	else if (Subtarget.hasAVX()) {
	SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec);
	SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
	{0, 1, 2, 3, 0, 1, 2, 3});
	SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
	{4, 5, 6, 7, 4, 5, 6, 7});
	if (Subtarget.hasXOP())
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi,
	IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
	// Permute Lo and Hi and then select based on index range.
	// This works as VPERMILPS only uses index bits[0:1] to permute elements.
	SDValue Res = DAG.getSelectCC(
	DL, IndicesVec, DAG.getConstant(3, DL, MVT::v8i32),
	DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, HiHi, IndicesVec),
	DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, LoLo, IndicesVec),
	ISD::CondCode::SETGT);
	return DAG.getBitcast(VT, Res);
	}
	break;
	case MVT::v4i64:
	case MVT::v4f64:
	if (Subtarget.hasAVX512()) {
	if (!Subtarget.hasVLX()) {
	MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8);
	SrcVec = widenSubVector(WidenSrcVT, SrcVec, false, Subtarget, DAG,
	SDLoc(SrcVec));
	IndicesVec = widenSubVector(MVT::v8i64, IndicesVec, false, Subtarget,
	DAG, SDLoc(IndicesVec));
	SDValue Res = createVariablePermute(WidenSrcVT, SrcVec, IndicesVec, DL,
	DAG, Subtarget);
	return extract256BitVector(Res, 0, DAG, DL);
	}
	Opcode = X86ISD::VPERMV;
	} else if (Subtarget.hasAVX()) {
	SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec);
	SDValue LoLo =
	DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1});
	SDValue HiHi =
	DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3});
	// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
	IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
	if (Subtarget.hasXOP())
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi,
	IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
	// Permute Lo and Hi and then select based on index range.
	// This works as VPERMILPD only uses index bit[1] to permute elements.
	SDValue Res = DAG.getSelectCC(
	DL, IndicesVec, DAG.getConstant(2, DL, MVT::v4i64),
	DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, HiHi, IndicesVec),
	DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, LoLo, IndicesVec),
	ISD::CondCode::SETGT);
	return DAG.getBitcast(VT, Res);
	}
	break;
	case MVT::v64i8:
	if (Subtarget.hasVBMI())
	Opcode = X86ISD::VPERMV;
	break;
	case MVT::v32i16:
	if (Subtarget.hasBWI())
	Opcode = X86ISD::VPERMV;
	break;
	case MVT::v16f32:
	case MVT::v16i32:
	case MVT::v8f64:
	case MVT::v8i64:
	if (Subtarget.hasAVX512())
	Opcode = X86ISD::VPERMV;
	break;
	}
	if (!Opcode)
	return SDValue();

	assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) &&
	(VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 &&
	"Illegal variable permute shuffle type");

	uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits();
	if (Scale > 1)
	IndicesVec = ScaleIndices(IndicesVec, Scale);

	EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger();
	IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec);

	SrcVec = DAG.getBitcast(ShuffleVT, SrcVec);
	SDValue Res = Opcode == X86ISD::VPERMV
	? DAG.getNode(Opcode, DL, ShuffleVT, IndicesVec, SrcVec)
	: DAG.getNode(Opcode, DL, ShuffleVT, SrcVec, IndicesVec);
	return DAG.getBitcast(VT, Res);
	}

	// Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be
	// reasoned to be a permutation of a vector by indices in a non-constant vector.
	// (build_vector (extract_elt V, (extract_elt I, 0)),
	// (extract_elt V, (extract_elt I, 1)),
	// ...
	// ->
	// (vpermv I, V)
	//
	// TODO: Handle undefs
	// TODO: Utilize pshufb and zero mask blending to support more efficient
	// construction of vectors with constant-0 elements.
	static SDValue
	LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue SrcVec, IndicesVec;
	// Check for a match of the permute source vector and permute index elements.
	// This is done by checking that the i-th build_vector operand is of the form:
	// (extract_elt SrcVec, (extract_elt IndicesVec, i)).
	for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) {
	SDValue Op = V.getOperand(Idx);
	if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	// If this is the first extract encountered in V, set the source vector,
	// otherwise verify the extract is from the previously defined source
	// vector.
	if (!SrcVec)
	SrcVec = Op.getOperand(0);
	else if (SrcVec != Op.getOperand(0))
	return SDValue();
	SDValue ExtractedIndex = Op->getOperand(1);
	// Peek through extends.
	if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND \|\|
	ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND)
	ExtractedIndex = ExtractedIndex.getOperand(0);
	if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	// If this is the first extract from the index vector candidate, set the
	// indices vector, otherwise verify the extract is from the previously
	// defined indices vector.
	if (!IndicesVec)
	IndicesVec = ExtractedIndex.getOperand(0);
	else if (IndicesVec != ExtractedIndex.getOperand(0))
	return SDValue();

	auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1));
	if (!PermIdx \|\| PermIdx->getAPIntValue() != Idx)
	return SDValue();
	}

	SDLoc DL(V);
	MVT VT = V.getSimpleValueType();
	return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
	}

	SDValue
	X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);

	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	MVT OpEltVT = Op.getOperand(0).getSimpleValueType();
	unsigned NumElems = Op.getNumOperands();

	// Generate vectors for predicate vectors.
	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
	return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);

	if (VT.getVectorElementType() == MVT::bf16 && Subtarget.hasBF16())
	return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget);

	if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
	return VectorConstant;

	unsigned EVTBits = EltVT.getSizeInBits();
	APInt UndefMask = APInt::getZero(NumElems);
	APInt FrozenUndefMask = APInt::getZero(NumElems);
	APInt ZeroMask = APInt::getZero(NumElems);
	APInt NonZeroMask = APInt::getZero(NumElems);
	bool IsAllConstants = true;
	SmallSet<SDValue, 8> Values;
	unsigned NumConstants = NumElems;
	for (unsigned i = 0; i < NumElems; ++i) {
	SDValue Elt = Op.getOperand(i);
	if (Elt.isUndef()) {
	UndefMask.setBit(i);
	continue;
	}
	if (Elt.getOpcode() == ISD::FREEZE && Elt.getOperand(0).isUndef()) {
	FrozenUndefMask.setBit(i);
	continue;
	}
	Values.insert(Elt);
	if (!isa<ConstantSDNode>(Elt) && !isa<ConstantFPSDNode>(Elt)) {
	IsAllConstants = false;
	NumConstants--;
	}
	if (X86::isZeroNode(Elt)) {
	ZeroMask.setBit(i);
	} else {
	NonZeroMask.setBit(i);
	}
	}

	// All undef vector. Return an UNDEF.
	if (UndefMask.isAllOnes())
	return DAG.getUNDEF(VT);

	// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
	// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
	// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
	// and blend the FREEZE-UNDEF operands back in.
	// FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?
	if (unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
	NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
	SmallVector<int, 16> BlendMask(NumElems, -1);
	SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
	for (unsigned i = 0; i < NumElems; ++i) {
	if (UndefMask[i]) {
	BlendMask[i] = -1;
	continue;
	}
	BlendMask[i] = i;
	if (!FrozenUndefMask[i])
	Elts[i] = Op.getOperand(i);
	else
	BlendMask[i] += NumElems;
	}
	SDValue EltsBV = DAG.getBuildVector(VT, dl, Elts);
	SDValue FrozenUndefElt = DAG.getFreeze(DAG.getUNDEF(OpEltVT));
	SDValue FrozenUndefBV = DAG.getSplatBuildVector(VT, dl, FrozenUndefElt);
	return DAG.getVectorShuffle(VT, dl, EltsBV, FrozenUndefBV, BlendMask);
	}

	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());

	// If the upper elts of a ymm/zmm are undef/zero then we might be better off
	// lowering to a smaller build vector and padding with undef/zero.
	if ((VT.is256BitVector() \|\| VT.is512BitVector()) &&
	!isFoldableUseOfShuffle(BV)) {
	unsigned UpperElems = NumElems / 2;
	APInt UndefOrZeroMask = UndefMask \| ZeroMask;
	unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countLeadingOnes();
	if (NumUpperUndefsOrZeros >= UpperElems) {
	if (VT.is512BitVector() &&
	NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4)))
	UpperElems = NumElems - (NumElems / 4);
	bool UndefUpper = UndefMask.countLeadingOnes() >= UpperElems;
	MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems);
	SDValue NewBV =
	DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems));
	return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl);
	}
	}

	if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
	return AddSub;
	if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
	return HorizontalOp;
	if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
	return Broadcast;
	if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
	return BitOp;

	unsigned NumZero = ZeroMask.countPopulation();
	unsigned NumNonZero = NonZeroMask.countPopulation();

	// If we are inserting one variable into a vector of non-zero constants, try
	// to avoid loading each constant element as a scalar. Load the constants as a
	// vector and then insert the variable scalar element. If insertion is not
	// supported, fall back to a shuffle to get the scalar blended with the
	// constants. Insertion into a zero vector is handled as a special-case
	// somewhere below here.
	if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
	(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) \|\|
	isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
	// Create an all-constant vector. The variable element in the old
	// build vector is replaced by undef in the constant vector. Save the
	// variable scalar element and its index for use in the insertelement.
	LLVMContext &Context = *DAG.getContext();
	Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context);
	SmallVector<Constant *, 16> ConstVecOps(NumElems, UndefValue::get(EltType));
	SDValue VarElt;
	SDValue InsIndex;
	for (unsigned i = 0; i != NumElems; ++i) {
	SDValue Elt = Op.getOperand(i);
	if (auto *C = dyn_cast<ConstantSDNode>(Elt))
	ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue());
	else if (auto *C = dyn_cast<ConstantFPSDNode>(Elt))
	ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF());
	else if (!Elt.isUndef()) {
	assert(!VarElt.getNode() && !InsIndex.getNode() &&
	"Expected one variable element in this vector");
	VarElt = Elt;
	InsIndex = DAG.getVectorIdxConstant(i, dl);
	}
	}
	Constant *CV = ConstantVector::get(ConstVecOps);
	SDValue DAGConstVec = DAG.getConstantPool(CV, VT);

	// The constants we just created may not be legal (eg, floating point). We
	// must lower the vector right here because we can not guarantee that we'll
	// legalize it before loading it. This is also why we could not just create
	// a new build vector here. If the build vector contains illegal constants,
	// it could get split back up into a series of insert elements.
	// TODO: Improve this by using shorter loads with broadcast/VZEXT_LOAD.
	SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG);
	MachineFunction &MF = DAG.getMachineFunction();
	MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
	SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);
	unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue();
	unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits();
	if (InsertC < NumEltsInLow128Bits)
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);

	// There's no good way to insert into the high elements of a >128-bit
	// vector, so use shuffles to avoid an extract/insert sequence.
	assert(VT.getSizeInBits() > 128 && "Invalid insertion index?");
	assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector");
	SmallVector<int, 8> ShuffleMask;
	unsigned NumElts = VT.getVectorNumElements();
	for (unsigned i = 0; i != NumElts; ++i)
	ShuffleMask.push_back(i == InsertC ? NumElts : i);
	SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);
	return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask);
	}

	// Special case for single non-zero, non-undef, element.
	if (NumNonZero == 1) {
	unsigned Idx = NonZeroMask.countTrailingZeros();
	SDValue Item = Op.getOperand(Idx);

	// If we have a constant or non-constant insertion into the low element of
	// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
	// the rest of the elements. This will be matched as movd/movq/movss/movsd
	// depending on what the source datatype is.
	if (Idx == 0) {
	if (NumZero == 0)
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);

	if (EltVT == MVT::i32 \|\| EltVT == MVT::f16 \|\| EltVT == MVT::f32 \|\|
	EltVT == MVT::f64 \|\| (EltVT == MVT::i64 && Subtarget.is64Bit()) \|\|
	(EltVT == MVT::i16 && Subtarget.hasFP16())) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\|
	VT.is512BitVector()) &&
	"Expected an SSE value type!");
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
	// Turn it into a MOVL (i.e. movsh, movss, movsd, movw or movd) to a
	// zero vector.
	return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
	}

	// We can't directly insert an i8 or i16 into a vector, so zero extend
	// it to i32 first.
	if (EltVT == MVT::i16 \|\| EltVT == MVT::i8) {
	Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
	MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
	Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
	return DAG.getBitcast(VT, Item);
	}
	}

	// Is it a vector logical left shift?
	if (NumElems == 2 && Idx == 1 &&
	X86::isZeroNode(Op.getOperand(0)) &&
	!X86::isZeroNode(Op.getOperand(1))) {
	unsigned NumBits = VT.getSizeInBits();
	return getVShift(true, VT,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
	VT, Op.getOperand(1)),
	NumBits/2, DAG, *this, dl);
	}

	if (IsAllConstants) // Otherwise, it's better to do a constpool load.
	return SDValue();

	// Otherwise, if this is a vector with i32 or f32 elements, and the element
	// is a non-constant being inserted into an element other than the low one,
	// we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
	// movd/movss) to move this into the low element, then shuffle it into
	// place.
	if (EVTBits == 32) {
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
	return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
	}
	}

	// Splat is obviously ok. Let legalizer expand it to a shuffle.
	if (Values.size() == 1) {
	if (EVTBits == 32) {
	// Instead of a shuffle like this:
	// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
	// Check if it's possible to issue this instead.
	// shuffle (vload ptr)), undef, <1, 1, 1, 1>
	unsigned Idx = NonZeroMask.countTrailingZeros();
	SDValue Item = Op.getOperand(Idx);
	if (Op.getNode()->isOnlyUserOf(Item.getNode()))
	return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
	}
	return SDValue();
	}

	// A vector full of immediates; various special cases are already
	// handled, so this is best done with a single constant-pool load.
	if (IsAllConstants)
	return SDValue();

	if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget))
	return V;

	// See if we can use a vector load to get all of the elements.
	{
	SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
	if (SDValue LD =
	EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
	return LD;
	}

	// If this is a splat of pairs of 32-bit elements, we can use a narrower
	// build_vector and broadcast it.
	// TODO: We could probably generalize this more.
	if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
	SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
	DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
	auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) {
	// Make sure all the even/odd operands match.
	for (unsigned i = 2; i != NumElems; ++i)
	if (Ops[i % 2] != Op.getOperand(i))
	return false;
	return true;
	};
	if (CanSplat(Op, NumElems, Ops)) {
	MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64;
	MVT NarrowVT = MVT::getVectorVT(EltVT, 4);
	// Create a new build vector and cast to v2i64/v2f64.
	SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2),
	DAG.getBuildVector(NarrowVT, dl, Ops));
	// Broadcast from v2i64/v2f64 and cast to final VT.
	MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2);
	return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT,
	NewBV));
	}
	}

	// For AVX-length vectors, build the individual 128-bit pieces and use
	// shuffles to put them in place.
	if (VT.getSizeInBits() > 128) {
	MVT HVT = MVT::getVectorVT(EltVT, NumElems / 2);

	// Build both the lower and upper subvector.
	SDValue Lower =
	DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2));
	SDValue Upper = DAG.getBuildVector(
	HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2));

	// Recreate the wider vector with the lower and upper part.
	return concatSubVectors(Lower, Upper, DAG, dl);
	}

	// Let legalizer expand 2-wide build_vectors.
	if (EVTBits == 64) {
	if (NumNonZero == 1) {
	// One half is zero or undef.
	unsigned Idx = NonZeroMask.countTrailingZeros();
	SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
	Op.getOperand(Idx));
	return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
	}
	return SDValue();
	}

	// If element VT is < 32 bits, convert it to inserts into a zero vector.
	if (EVTBits == 8 && NumElems == 16)
	if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero,
	DAG, Subtarget))
	return V;

	if (EltVT == MVT::i16 && NumElems == 8)
	if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
	DAG, Subtarget))
	return V;

	// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
	if (EVTBits == 32 && NumElems == 4)
	if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget))
	return V;

	// If element VT is == 32 bits, turn it into a number of shuffles.
	if (NumElems == 4 && NumZero > 0) {
	SmallVector<SDValue, 8> Ops(NumElems);
	for (unsigned i = 0; i < 4; ++i) {
	bool isZero = !NonZeroMask[i];
	if (isZero)
	Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
	else
	Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
	}

	for (unsigned i = 0; i < 2; ++i) {
	switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
	default: llvm_unreachable("Unexpected NonZero count");
	case 0:
	Ops[i] = Ops[i*2]; // Must be a zero vector.
	break;
	case 1:
	Ops[i] = getMOVL(DAG, dl, VT, Ops[i2+1], Ops[i2]);
	break;
	case 2:
	Ops[i] = getMOVL(DAG, dl, VT, Ops[i2], Ops[i2+1]);
	break;
	case 3:
	Ops[i] = getUnpackl(DAG, dl, VT, Ops[i2], Ops[i2+1]);
	break;
	}
	}

	bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
	bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
	int MaskVec[] = {
	Reverse1 ? 1 : 0,
	Reverse1 ? 0 : 1,
	static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
	static_cast<int>(Reverse2 ? NumElems : NumElems+1)
	};
	return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);
	}

	assert(Values.size() > 1 && "Expected non-undef and non-splat vector");

	// Check for a build vector from mostly shuffle plus few inserting.
	if (SDValue Sh = buildFromShuffleMostly(Op, DAG))
	return Sh;

	// For SSE 4.1, use insertps to put the high elements into the low element.
	if (Subtarget.hasSSE41() && EltVT != MVT::f16) {
	SDValue Result;
	if (!Op.getOperand(0).isUndef())
	Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
	else
	Result = DAG.getUNDEF(VT);

	for (unsigned i = 1; i < NumElems; ++i) {
	if (Op.getOperand(i).isUndef()) continue;
	Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
	Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
	}
	return Result;
	}

	// Otherwise, expand into a number of unpckl*, start by extending each of
	// our (non-undef) elements to the full vector width with the element in the
	// bottom slot of the vector (which generates no code for SSE).
	SmallVector<SDValue, 8> Ops(NumElems);
	for (unsigned i = 0; i < NumElems; ++i) {
	if (!Op.getOperand(i).isUndef())
	Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
	else
	Ops[i] = DAG.getUNDEF(VT);
	}

	// Next, we iteratively mix elements, e.g. for v4f32:
	// Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0>
	// : unpcklps 2, 3 ==> Y: <?, ?, 3, 2>
	// Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
	for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
	// Generate scaled UNPCKL shuffle mask.
	SmallVector<int, 16> Mask;
	for(unsigned i = 0; i != Scale; ++i)
	Mask.push_back(i);
	for (unsigned i = 0; i != Scale; ++i)
	Mask.push_back(NumElems+i);
	Mask.append(NumElems - Mask.size(), SM_SentinelUndef);

	for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
	Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2i], Ops[(2i)+1], Mask);
	}
	return Ops[0];
	}

	// 256-bit AVX can use the vinsertf128 instruction
	// to create 256-bit vectors from two other 128-bit ones.
	// TODO: Detect subvector broadcast here instead of DAG combine?
	static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	MVT ResVT = Op.getSimpleValueType();

	assert((ResVT.is256BitVector() \|\|
	ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");

	unsigned NumOperands = Op.getNumOperands();
	unsigned NumFreezeUndef = 0;
	unsigned NumZero = 0;
	unsigned NumNonZero = 0;
	unsigned NonZeros = 0;
	for (unsigned i = 0; i != NumOperands; ++i) {
	SDValue SubVec = Op.getOperand(i);
	if (SubVec.isUndef())
	continue;
	if (ISD::isFreezeUndef(SubVec.getNode()) && SubVec.hasOneUse())
	++NumFreezeUndef;
	else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
	++NumZero;
	else {
	assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
	NonZeros \|= 1 << i;
	++NumNonZero;
	}
	}

	// If we have more than 2 non-zeros, build each half separately.
	if (NumNonZero > 2) {
	MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
	ArrayRef<SDUse> Ops = Op->ops();
	SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
	Ops.slice(0, NumOperands/2));
	SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
	Ops.slice(NumOperands/2));
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
	}

	// Otherwise, build it up through insert_subvectors.
	SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
	: (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
	: DAG.getUNDEF(ResVT));

	MVT SubVT = Op.getOperand(0).getSimpleValueType();
	unsigned NumSubElems = SubVT.getVectorNumElements();
	for (unsigned i = 0; i != NumOperands; ++i) {
	if ((NonZeros & (1 << i)) == 0)
	continue;

	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec,
	Op.getOperand(i),
	DAG.getIntPtrConstant(i * NumSubElems, dl));
	}

	return Vec;
	}

	// Returns true if the given node is a type promotion (by concatenating i1
	// zeros) of the result of a node that already zeros all upper bits of
	// k-register.
	// TODO: Merge this with LowerAVXCONCAT_VECTORS?
	static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG & DAG) {
	SDLoc dl(Op);
	MVT ResVT = Op.getSimpleValueType();
	unsigned NumOperands = Op.getNumOperands();

	assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
	"Unexpected number of operands in CONCAT_VECTORS");

	uint64_t Zeros = 0;
	uint64_t NonZeros = 0;
	for (unsigned i = 0; i != NumOperands; ++i) {
	SDValue SubVec = Op.getOperand(i);
	if (SubVec.isUndef())
	continue;
	assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
	if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
	Zeros \|= (uint64_t)1 << i;
	else
	NonZeros \|= (uint64_t)1 << i;
	}

	unsigned NumElems = ResVT.getVectorNumElements();

	// If we are inserting non-zero vector and there are zeros in LSBs and undef
	// in the MSBs we need to emit a KSHIFTL. The generic lowering to
	// insert_subvector will give us two kshifts.
	if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros &&
	Log2_64(NonZeros) != NumOperands - 1) {
	MVT ShiftVT = ResVT;
	if ((!Subtarget.hasDQI() && NumElems == 8) \|\| NumElems < 8)
	ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
	unsigned Idx = Log2_64(NonZeros);
	SDValue SubVec = Op.getOperand(Idx);
	unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
	SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT,
	DAG.getUNDEF(ShiftVT), SubVec,
	DAG.getIntPtrConstant(0, dl));
	Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
	DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8));
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
	DAG.getIntPtrConstant(0, dl));
	}

	// If there are zero or one non-zeros we can handle this very simply.
	if (NonZeros == 0 \|\| isPowerOf2_64(NonZeros)) {
	SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT);
	if (!NonZeros)
	return Vec;
	unsigned Idx = Log2_64(NonZeros);
	SDValue SubVec = Op.getOperand(Idx);
	unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec,
	DAG.getIntPtrConstant(Idx * SubVecNumElts, dl));
	}

	if (NumOperands > 2) {
	MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
	ArrayRef<SDUse> Ops = Op->ops();
	SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
	Ops.slice(0, NumOperands/2));
	SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
	Ops.slice(NumOperands/2));
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
	}

	assert(llvm::popcount(NonZeros) == 2 && "Simple cases not handled?");

	if (ResVT.getVectorNumElements() >= 16)
	return Op; // The operation is legal with KUNPCK

	SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
	DAG.getUNDEF(ResVT), Op.getOperand(0),
	DAG.getIntPtrConstant(0, dl));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1),
	DAG.getIntPtrConstant(NumElems/2, dl));
	}

	static SDValue LowerCONCAT_VECTORS(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	if (VT.getVectorElementType() == MVT::i1)
	return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);

	assert((VT.is256BitVector() && Op.getNumOperands() == 2) \|\|
	(VT.is512BitVector() && (Op.getNumOperands() == 2 \|\|
	Op.getNumOperands() == 4)));

	// AVX can use the vinsertf128 instruction to create 256-bit vectors
	// from two other 128-bit ones.

	// 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
	return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget);
	}

	//===----------------------------------------------------------------------===//
	// Vector shuffle lowering
	//
	// This is an experimental code path for lowering vector shuffles on x86. It is
	// designed to handle arbitrary vector shuffles and blends, gracefully
	// degrading performance as necessary. It works hard to recognize idiomatic
	// shuffles and lower them to optimal instruction patterns without leaving
	// a framework that allows reasonably efficient handling of all vector shuffle
	// patterns.
	//===----------------------------------------------------------------------===//

	/// Tiny helper function to identify a no-op mask.
	///
	/// This is a somewhat boring predicate function. It checks whether the mask
	/// array input, which is assumed to be a single-input shuffle mask of the kind
	/// used by the X86 shuffle instructions (not a fully general
	/// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an
	/// in-place shuffle are 'no-op's.
	static bool isNoopShuffleMask(ArrayRef<int> Mask) {
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	assert(Mask[i] >= -1 && "Out of bound mask element!");
	if (Mask[i] >= 0 && Mask[i] != i)
	return false;
	}
	return true;
	}

	/// Test whether there are elements crossing LaneSizeInBits lanes in this
	/// shuffle mask.
	///
	/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
	/// and we routinely test for these.
	static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits,
	unsigned ScalarSizeInBits,
	ArrayRef<int> Mask) {
	assert(LaneSizeInBits && ScalarSizeInBits &&
	(LaneSizeInBits % ScalarSizeInBits) == 0 &&
	"Illegal shuffle lane size");
	int LaneSize = LaneSizeInBits / ScalarSizeInBits;
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
	return true;
	return false;
	}

	/// Test whether there are elements crossing 128-bit lanes in this
	/// shuffle mask.
	static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
	return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask);
	}

	/// Test whether elements in each LaneSizeInBits lane in this shuffle mask come
	/// from multiple lanes - this is different to isLaneCrossingShuffleMask to
	/// better support 'repeated mask + lane permute' style shuffles.
	static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits,
	unsigned ScalarSizeInBits,
	ArrayRef<int> Mask) {
	assert(LaneSizeInBits && ScalarSizeInBits &&
	(LaneSizeInBits % ScalarSizeInBits) == 0 &&
	"Illegal shuffle lane size");
	int NumElts = Mask.size();
	int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits;
	int NumLanes = NumElts / NumEltsPerLane;
	if (NumLanes > 1) {
	for (int i = 0; i != NumLanes; ++i) {
	int SrcLane = -1;
	for (int j = 0; j != NumEltsPerLane; ++j) {
	int M = Mask[(i * NumEltsPerLane) + j];
	if (M < 0)
	continue;
	int Lane = (M % NumElts) / NumEltsPerLane;
	if (SrcLane >= 0 && SrcLane != Lane)
	return true;
	SrcLane = Lane;
	}
	}
	}
	return false;
	}

	/// Test whether a shuffle mask is equivalent within each sub-lane.
	///
	/// This checks a shuffle mask to see if it is performing the same
	/// lane-relative shuffle in each sub-lane. This trivially implies
	/// that it is also not lane-crossing. It may however involve a blend from the
	/// same lane of a second vector.
	///
	/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
	/// non-trivial to compute in the face of undef lanes. The representation is
	/// suitable for use with existing 128-bit shuffles as entries from the second
	/// vector have been remapped to [LaneSize, 2*LaneSize).
	static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
	ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
	RepeatedMask.assign(LaneSize, -1);
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i) {
	assert(Mask[i] == SM_SentinelUndef \|\| Mask[i] >= 0);
	if (Mask[i] < 0)
	continue;
	if ((Mask[i] % Size) / LaneSize != i / LaneSize)
	// This entry crosses lanes, so there is no way to model this shuffle.
	return false;

	// Ok, handle the in-lane shuffles by detecting if and when they repeat.
	// Adjust second vector indices to start at LaneSize instead of Size.
	int LocalM = Mask[i] < Size ? Mask[i] % LaneSize
	: Mask[i] % LaneSize + LaneSize;
	if (RepeatedMask[i % LaneSize] < 0)
	// This is the first non-undef entry in this slot of a 128-bit lane.
	RepeatedMask[i % LaneSize] = LocalM;
	else if (RepeatedMask[i % LaneSize] != LocalM)
	// Found a mismatch with the repeated mask.
	return false;
	}
	return true;
	}

	/// Test whether a shuffle mask is equivalent within each 128-bit lane.
	static bool
	is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
	}

	static bool
	is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
	SmallVector<int, 32> RepeatedMask;
	return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
	}

	/// Test whether a shuffle mask is equivalent within each 256-bit lane.
	static bool
	is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);
	}

	/// Test whether a target shuffle mask is equivalent within each sub-lane.
	/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
	static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits,
	unsigned EltSizeInBits,
	ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	int LaneSize = LaneSizeInBits / EltSizeInBits;
	RepeatedMask.assign(LaneSize, SM_SentinelUndef);
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i) {
	assert(isUndefOrZero(Mask[i]) \|\| (Mask[i] >= 0));
	if (Mask[i] == SM_SentinelUndef)
	continue;
	if (Mask[i] == SM_SentinelZero) {
	if (!isUndefOrZero(RepeatedMask[i % LaneSize]))
	return false;
	RepeatedMask[i % LaneSize] = SM_SentinelZero;
	continue;
	}
	if ((Mask[i] % Size) / LaneSize != i / LaneSize)
	// This entry crosses lanes, so there is no way to model this shuffle.
	return false;

	// Handle the in-lane shuffles by detecting if and when they repeat. Adjust
	// later vector indices to start at multiples of LaneSize instead of Size.
	int LaneM = Mask[i] / Size;
	int LocalM = (Mask[i] % LaneSize) + (LaneM * LaneSize);
	if (RepeatedMask[i % LaneSize] == SM_SentinelUndef)
	// This is the first non-undef entry in this slot of a 128-bit lane.
	RepeatedMask[i % LaneSize] = LocalM;
	else if (RepeatedMask[i % LaneSize] != LocalM)
	// Found a mismatch with the repeated mask.
	return false;
	}
	return true;
	}

	/// Test whether a target shuffle mask is equivalent within each sub-lane.
	/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
	static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
	ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(),
	Mask, RepeatedMask);
	}

	/// Checks whether the vector elements referenced by two shuffle masks are
	/// equivalent.
	static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
	int Idx, int ExpectedIdx) {
	assert(0 <= Idx && Idx < MaskSize && 0 <= ExpectedIdx &&
	ExpectedIdx < MaskSize && "Out of range element index");
	if (!Op \|\| !ExpectedOp \|\| Op.getOpcode() != ExpectedOp.getOpcode())
	return false;

	switch (Op.getOpcode()) {
	case ISD::BUILD_VECTOR:
	// If the values are build vectors, we can look through them to find
	// equivalent inputs that make the shuffles equivalent.
	// TODO: Handle MaskSize != Op.getNumOperands()?
	if (MaskSize == (int)Op.getNumOperands() &&
	MaskSize == (int)ExpectedOp.getNumOperands())
	return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx);
	break;
	case X86ISD::VBROADCAST:
	case X86ISD::VBROADCAST_LOAD:
	// TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()?
	return (Op == ExpectedOp &&
	(int)Op.getValueType().getVectorNumElements() == MaskSize);
	case X86ISD::HADD:
	case X86ISD::HSUB:
	case X86ISD::FHADD:
	case X86ISD::FHSUB:
	case X86ISD::PACKSS:
	case X86ISD::PACKUS:
	// HOP(X,X) can refer to the elt from the lower/upper half of a lane.
	// TODO: Handle MaskSize != NumElts?
	// TODO: Handle HOP(X,Y) vs HOP(Y,X) equivalence cases.
	if (Op == ExpectedOp && Op.getOperand(0) == Op.getOperand(1)) {
	MVT VT = Op.getSimpleValueType();
	int NumElts = VT.getVectorNumElements();
	if (MaskSize == NumElts) {
	int NumLanes = VT.getSizeInBits() / 128;
	int NumEltsPerLane = NumElts / NumLanes;
	int NumHalfEltsPerLane = NumEltsPerLane / 2;
	bool SameLane =
	(Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane);
	bool SameElt =
	(Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane);
	return SameLane && SameElt;
	}
	}
	break;
	}

	return false;
	}

	/// Checks whether a shuffle mask is equivalent to an explicit list of
	/// arguments.
	///
	/// This is a fast way to test a shuffle mask against a fixed pattern:
	///
	/// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... }
	///
	/// It returns true if the mask is exactly as wide as the argument list, and
	/// each element of the mask is either -1 (signifying undef) or the value given
	/// in the argument.
	static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask,
	SDValue V1 = SDValue(),
	SDValue V2 = SDValue()) {
	int Size = Mask.size();
	if (Size != (int)ExpectedMask.size())
	return false;

	for (int i = 0; i < Size; ++i) {
	assert(Mask[i] >= -1 && "Out of bound mask element!");
	int MaskIdx = Mask[i];
	int ExpectedIdx = ExpectedMask[i];
	if (0 <= MaskIdx && MaskIdx != ExpectedIdx) {
	SDValue MaskV = MaskIdx < Size ? V1 : V2;
	SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
	MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);
	ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
	if (!IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx))
	return false;
	}
	}
	return true;
	}

	/// Checks whether a target shuffle mask is equivalent to an explicit pattern.
	///
	/// The masks must be exactly the same width.
	///
	/// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
	/// value in ExpectedMask is always accepted. Otherwise the indices must match.
	///
	/// SM_SentinelZero is accepted as a valid negative index but must match in
	/// both, or via a known bits test.
	static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
	ArrayRef<int> ExpectedMask,
	const SelectionDAG &DAG,
	SDValue V1 = SDValue(),
	SDValue V2 = SDValue()) {
	int Size = Mask.size();
	if (Size != (int)ExpectedMask.size())
	return false;
	assert(llvm::all_of(ExpectedMask,
	[Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
	"Illegal target shuffle mask");

	// Check for out-of-range target shuffle mask indices.
	if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size))
	return false;

	// Don't use V1/V2 if they're not the same size as the shuffle mask type.
	if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits())
	V1 = SDValue();
	if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits())
	V2 = SDValue();

	APInt ZeroV1 = APInt::getNullValue(Size);
	APInt ZeroV2 = APInt::getNullValue(Size);

	for (int i = 0; i < Size; ++i) {
	int MaskIdx = Mask[i];
	int ExpectedIdx = ExpectedMask[i];
	if (MaskIdx == SM_SentinelUndef \|\| MaskIdx == ExpectedIdx)
	continue;
	if (MaskIdx == SM_SentinelZero) {
	// If we need this expected index to be a zero element, then update the
	// relevant zero mask and perform the known bits at the end to minimize
	// repeated computes.
	SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
	if (ExpectedV &&
	Size == (int)ExpectedV.getValueType().getVectorNumElements()) {
	int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
	APInt &ZeroMask = ExpectedIdx < Size ? ZeroV1 : ZeroV2;
	ZeroMask.setBit(BitIdx);
	continue;
	}
	}
	if (MaskIdx >= 0) {
	SDValue MaskV = MaskIdx < Size ? V1 : V2;
	SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
	MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);
	ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
	if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx))
	continue;
	}
	return false;
	}
	return (ZeroV1.isNullValue() \|\| DAG.MaskedVectorIsZero(V1, ZeroV1)) &&
	(ZeroV2.isNullValue() \|\| DAG.MaskedVectorIsZero(V2, ZeroV2));
	}

	// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
	// instructions.
	static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT,
	const SelectionDAG &DAG) {
	if (VT != MVT::v8i32 && VT != MVT::v8f32)
	return false;

	SmallVector<int, 8> Unpcklwd;
	createUnpackShuffleMask(MVT::v8i16, Unpcklwd, /* Lo = */ true,
	/* Unary = */ false);
	SmallVector<int, 8> Unpckhwd;
	createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
	/* Unary = */ false);
	bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) \|\|
	isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG));
	return IsUnpackwdMask;
	}

	static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask,
	const SelectionDAG &DAG) {
	// Create 128-bit vector type based on mask size.
	MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
	MVT VT = MVT::getVectorVT(EltVT, Mask.size());

	// We can't assume a canonical shuffle mask, so try the commuted version too.
	SmallVector<int, 4> CommutedMask(Mask);
	ShuffleVectorSDNode::commuteMask(CommutedMask);

	// Match any of unary/binary or low/high.
	for (unsigned i = 0; i != 4; ++i) {
	SmallVector<int, 16> UnpackMask;
	createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
	if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) \|\|
	isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG))
	return true;
	}
	return false;
	}

	/// Return true if a shuffle mask chooses elements identically in its top and
	/// bottom halves. For example, any splat mask has the same top and bottom
	/// halves. If an element is undefined in only one half of the mask, the halves
	/// are not considered identical.
	static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) {
	assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask");
	unsigned HalfSize = Mask.size() / 2;
	for (unsigned i = 0; i != HalfSize; ++i) {
	if (Mask[i] != Mask[i + HalfSize])
	return false;
	}
	return true;
	}

	/// Get a 4-lane 8-bit shuffle immediate for a mask.
	///
	/// This helper function produces an 8-bit shuffle immediate corresponding to
	/// the ubiquitous shuffle encoding scheme used in x86 instructions for
	/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
	/// example.
	///
	/// NB: We rely heavily on "undef" masks preserving the input lane.
	static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
	assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
	assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
	assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
	assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
	assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");

	// If the mask only uses one non-undef element, then fully 'splat' it to
	// improve later broadcast matching.
	int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin();
	assert(0 <= FirstIndex && FirstIndex < 4 && "All undef shuffle mask");

	int FirstElt = Mask[FirstIndex];
	if (all_of(Mask, [FirstElt](int M) { return M < 0 \|\| M == FirstElt; }))
	return (FirstElt << 6) \| (FirstElt << 4) \| (FirstElt << 2) \| FirstElt;

	unsigned Imm = 0;
	Imm \|= (Mask[0] < 0 ? 0 : Mask[0]) << 0;
	Imm \|= (Mask[1] < 0 ? 1 : Mask[1]) << 2;
	Imm \|= (Mask[2] < 0 ? 2 : Mask[2]) << 4;
	Imm \|= (Mask[3] < 0 ? 3 : Mask[3]) << 6;
	return Imm;
	}

	static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
	SelectionDAG &DAG) {
	return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
	}

	// The Shuffle result is as follow:
	// 0a[0]0a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
	// Each Zeroable's element correspond to a particular Mask's element.
	// As described in computeZeroableShuffleElements function.
	//
	// The function looks for a sub-mask that the nonzero elements are in
	// increasing order. If such sub-mask exist. The function returns true.
	static bool isNonZeroElementsInOrder(const APInt &Zeroable,
	ArrayRef<int> Mask, const EVT &VectorType,
	bool &IsZeroSideLeft) {
	int NextElement = -1;
	// Check if the Mask's nonzero elements are in increasing order.
	for (int i = 0, e = Mask.size(); i < e; i++) {
	// Checks if the mask's zeros elements are built from only zeros.
	assert(Mask[i] >= -1 && "Out of bound mask element!");
	if (Mask[i] < 0)
	return false;
	if (Zeroable[i])
	continue;
	// Find the lowest non zero element
	if (NextElement < 0) {
	NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0;
	IsZeroSideLeft = NextElement != 0;
	}
	// Exit if the mask's non zero elements are not in increasing order.
	if (NextElement != Mask[i])
	return false;
	NextElement++;
	}
	return true;
	}

	/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
	static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Size = Mask.size();
	int LaneSize = 128 / VT.getScalarSizeInBits();
	const int NumBytes = VT.getSizeInBits() / 8;
	const int NumEltBytes = VT.getScalarSizeInBits() / 8;

	assert((Subtarget.hasSSSE3() && VT.is128BitVector()) \|\|
	(Subtarget.hasAVX2() && VT.is256BitVector()) \|\|
	(Subtarget.hasBWI() && VT.is512BitVector()));

	SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
	// Sign bit set in i8 mask means zero element.
	SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);

	SDValue V;
	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / NumEltBytes];
	if (M < 0) {
	PSHUFBMask[i] = DAG.getUNDEF(MVT::i8);
	continue;
	}
	if (Zeroable[i / NumEltBytes]) {
	PSHUFBMask[i] = ZeroMask;
	continue;
	}

	// We can only use a single input of V1 or V2.
	SDValue SrcV = (M >= Size ? V2 : V1);
	if (V && V != SrcV)
	return SDValue();
	V = SrcV;
	M %= Size;

	// PSHUFB can't cross lanes, ensure this doesn't happen.
	if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
	return SDValue();

	M = M % LaneSize;
	M = M * NumEltBytes + (i % NumEltBytes);
	PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
	}
	assert(V && "Failed to find a source input");

	MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
	DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
	}

	static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	const SDLoc &dl);

	// X86 has dedicated shuffle that can be lowered to VEXPAND
	static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,
	const APInt &Zeroable,
	ArrayRef<int> Mask, SDValue &V1,
	SDValue &V2, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	bool IsLeftZeroSide = true;
	if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
	IsLeftZeroSide))
	return SDValue();
	unsigned VEXPANDMask = (~Zeroable).getZExtValue();
	MVT IntegerType =
	MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
	SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
	unsigned NumElts = VT.getVectorNumElements();
	assert((NumElts == 4 \|\| NumElts == 8 \|\| NumElts == 16) &&
	"Unexpected number of vector elements");
	SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts),
	Subtarget, DAG, DL);
	SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
	SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
	return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask);
	}

	static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
	unsigned &UnpackOpcode, bool IsUnary,
	ArrayRef<int> TargetMask, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	int NumElts = VT.getVectorNumElements();

	bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true;
	for (int i = 0; i != NumElts; i += 2) {
	int M1 = TargetMask[i + 0];
	int M2 = TargetMask[i + 1];
	Undef1 &= (SM_SentinelUndef == M1);
	Undef2 &= (SM_SentinelUndef == M2);
	Zero1 &= isUndefOrZero(M1);
	Zero2 &= isUndefOrZero(M2);
	}
	assert(!((Undef1 \|\| Zero1) && (Undef2 \|\| Zero2)) &&
	"Zeroable shuffle detected");

	// Attempt to match the target mask against the unpack lo/hi mask patterns.
	SmallVector<int, 64> Unpckl, Unpckh;
	createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
	if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1,
	(IsUnary ? V1 : V2))) {
	UnpackOpcode = X86ISD::UNPCKL;
	V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
	V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
	return true;
	}

	createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
	if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1,
	(IsUnary ? V1 : V2))) {
	UnpackOpcode = X86ISD::UNPCKH;
	V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
	V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
	return true;
	}

	// If an unary shuffle, attempt to match as an unpack lo/hi with zero.
	if (IsUnary && (Zero1 \|\| Zero2)) {
	// Don't bother if we can blend instead.
	if ((Subtarget.hasSSE41() \|\| VT == MVT::v2i64 \|\| VT == MVT::v2f64) &&
	isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0))
	return false;

	bool MatchLo = true, MatchHi = true;
	for (int i = 0; (i != NumElts) && (MatchLo \|\| MatchHi); ++i) {
	int M = TargetMask[i];

	// Ignore if the input is known to be zero or the index is undef.
	if ((((i & 1) == 0) && Zero1) \|\| (((i & 1) == 1) && Zero2) \|\|
	(M == SM_SentinelUndef))
	continue;

	MatchLo &= (M == Unpckl[i]);
	MatchHi &= (M == Unpckh[i]);
	}

	if (MatchLo \|\| MatchHi) {
	UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
	V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
	V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
	return true;
	}
	}

	// If a binary shuffle, commute and try again.
	if (!IsUnary) {
	ShuffleVectorSDNode::commuteMask(Unpckl);
	if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) {
	UnpackOpcode = X86ISD::UNPCKL;
	std::swap(V1, V2);
	return true;
	}

	ShuffleVectorSDNode::commuteMask(Unpckh);
	if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) {
	UnpackOpcode = X86ISD::UNPCKH;
	std::swap(V1, V2);
	return true;
	}
	}

	return false;
	}

	// X86 has dedicated unpack instructions that can handle specific blend
	// operations: UNPCKH and UNPCKL.
	static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1, SDValue V2,
	SelectionDAG &DAG) {
	SmallVector<int, 8> Unpckl;
	createUnpackShuffleMask(VT, Unpckl, /* Lo = / true, / Unary = */ false);
	if (isShuffleEquivalent(Mask, Unpckl, V1, V2))
	return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);

	SmallVector<int, 8> Unpckh;
	createUnpackShuffleMask(VT, Unpckh, /* Lo = / false, / Unary = */ false);
	if (isShuffleEquivalent(Mask, Unpckh, V1, V2))
	return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);

	// Commute and try again.
	ShuffleVectorSDNode::commuteMask(Unpckl);
	if (isShuffleEquivalent(Mask, Unpckl, V1, V2))
	return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);

	ShuffleVectorSDNode::commuteMask(Unpckh);
	if (isShuffleEquivalent(Mask, Unpckh, V1, V2))
	return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);

	return SDValue();
	}

	/// Check if the mask can be mapped to a preliminary shuffle (vperm 64-bit)
	/// followed by unpack 256-bit.
	static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	SmallVector<int, 32> Unpckl, Unpckh;
	createSplat2ShuffleMask(VT, Unpckl, /* Lo */ true);
	createSplat2ShuffleMask(VT, Unpckh, /* Lo */ false);

	unsigned UnpackOpcode;
	if (isShuffleEquivalent(Mask, Unpckl, V1, V2))
	UnpackOpcode = X86ISD::UNPCKL;
	else if (isShuffleEquivalent(Mask, Unpckh, V1, V2))
	UnpackOpcode = X86ISD::UNPCKH;
	else
	return SDValue();

	// This is a "natural" unpack operation (rather than the 128-bit sectored
	// operation implemented by AVX). We need to rearrange 64-bit chunks of the
	// input in order to use the x86 instruction.
	V1 = DAG.getVectorShuffle(MVT::v4f64, DL, DAG.getBitcast(MVT::v4f64, V1),
	DAG.getUNDEF(MVT::v4f64), {0, 2, 1, 3});
	V1 = DAG.getBitcast(VT, V1);
	return DAG.getNode(UnpackOpcode, DL, VT, V1, V1);
	}

	// Check if the mask can be mapped to a TRUNCATE or VTRUNC, truncating the
	// source into the lower elements and zeroing the upper elements.
	static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
	ArrayRef<int> Mask, const APInt &Zeroable,
	const X86Subtarget &Subtarget) {
	if (!VT.is512BitVector() && !Subtarget.hasVLX())
	return false;

	unsigned NumElts = Mask.size();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	unsigned MaxScale = 64 / EltSizeInBits;

	for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
	unsigned SrcEltBits = EltSizeInBits * Scale;
	if (SrcEltBits < 32 && !Subtarget.hasBWI())
	continue;
	unsigned NumSrcElts = NumElts / Scale;
	if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale))
	continue;
	unsigned UpperElts = NumElts - NumSrcElts;
	if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
	continue;
	SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale);
	SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts);
	DstVT = MVT::getIntegerVT(EltSizeInBits);
	if ((NumSrcElts * EltSizeInBits) >= 128) {
	// ISD::TRUNCATE
	DstVT = MVT::getVectorVT(DstVT, NumSrcElts);
	} else {
	// X86ISD::VTRUNC
	DstVT = MVT::getVectorVT(DstVT, 128 / EltSizeInBits);
	}
	return true;
	}

	return false;
	}

	// Helper to create TRUNCATE/VTRUNC nodes, optionally with zero/undef upper
	// element padding to the final DstVT.
	static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG, bool ZeroUppers) {
	MVT SrcVT = Src.getSimpleValueType();
	MVT DstSVT = DstVT.getScalarType();
	unsigned NumDstElts = DstVT.getVectorNumElements();
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits();

	if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
	return SDValue();

	// Perform a direct ISD::TRUNCATE if possible.
	if (NumSrcElts == NumDstElts)
	return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);

	if (NumSrcElts > NumDstElts) {
	MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src);
	return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits());
	}

	if ((NumSrcElts * DstEltSizeInBits) >= 128) {
	MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src);
	return widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL,
	DstVT.getSizeInBits());
	}

	// Non-VLX targets must truncate from a 512-bit type, so we need to
	// widen, truncate and then possibly extract the original subvector.
	if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) {
	SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512);
	return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers);
	}

	// Fallback to a X86ISD::VTRUNC, padding if necessary.
	MVT TruncVT = MVT::getVectorVT(DstSVT, 128 / DstEltSizeInBits);
	SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src);
	if (DstVT != TruncVT)
	Trunc = widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL,
	DstVT.getSizeInBits());
	return Trunc;
	}

	// Try to lower trunc+vector_shuffle to a vpmovdb or a vpmovdw instruction.
	//
	// An example is the following:
	//
	// t0: ch = EntryToken
	// t2: v4i64,ch = CopyFromReg t0, Register:v4i64 %0
	// t25: v4i32 = truncate t2
	// t41: v8i16 = bitcast t25
	// t21: v8i16 = BUILD_VECTOR undef:i16, undef:i16, undef:i16, undef:i16,
	// Constant:i16<0>, Constant:i16<0>, Constant:i16<0>, Constant:i16<0>
	// t51: v8i16 = vector_shuffle<0,2,4,6,12,13,14,15> t41, t21
	// t18: v2i64 = bitcast t51
	//
	// One can just use a single vpmovdw instruction, without avx512vl we need to
	// use the zmm variant and extract the lower subvector, padding with zeroes.
	// TODO: Merge with lowerShuffleAsVTRUNC.
	static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert((VT == MVT::v16i8 \|\| VT == MVT::v8i16) && "Unexpected VTRUNC type");
	if (!Subtarget.hasAVX512())
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	unsigned MaxScale = 64 / EltSizeInBits;
	for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
	unsigned SrcEltBits = EltSizeInBits * Scale;
	unsigned NumSrcElts = NumElts / Scale;
	unsigned UpperElts = NumElts - NumSrcElts;
	if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) \|\|
	!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
	continue;

	// Attempt to find a matching source truncation, but as a fall back VLX
	// cases can use the VPMOV directly.
	SDValue Src = peekThroughBitcasts(V1);
	if (Src.getOpcode() == ISD::TRUNCATE &&
	Src.getScalarValueSizeInBits() == SrcEltBits) {
	Src = Src.getOperand(0);
	} else if (Subtarget.hasVLX()) {
	MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
	MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
	Src = DAG.getBitcast(SrcVT, Src);
	// Don't do this if PACKSS/PACKUS could perform it cheaper.
	if (Scale == 2 &&
	((DAG.ComputeNumSignBits(Src) > EltSizeInBits) \|\|
	(DAG.computeKnownBits(Src).countMinLeadingZeros() >= EltSizeInBits)))
	return SDValue();
	} else
	return SDValue();

	// VPMOVWB is only available with avx512bw.
	if (!Subtarget.hasBWI() && Src.getScalarValueSizeInBits() < 32)
	return SDValue();

	bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts);
	return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);
	}

	return SDValue();
	}

	// Attempt to match binary shuffle patterns as a truncate.
	static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector()) &&
	"Unexpected VTRUNC type");
	if (!Subtarget.hasAVX512())
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	unsigned MaxScale = 64 / EltSizeInBits;
	for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
	// TODO: Support non-BWI VPMOVWB truncations?
	unsigned SrcEltBits = EltSizeInBits * Scale;
	if (SrcEltBits < 32 && !Subtarget.hasBWI())
	continue;

	// Match shuffle <Ofs,Ofs+Scale,Ofs+2*Scale,..,undef_or_zero,undef_or_zero>
	// Bail if the V2 elements are undef.
	unsigned NumHalfSrcElts = NumElts / Scale;
	unsigned NumSrcElts = 2 * NumHalfSrcElts;
	for (unsigned Offset = 0; Offset != Scale; ++Offset) {
	if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, Offset, Scale) \|\|
	isUndefInRange(Mask, NumHalfSrcElts, NumHalfSrcElts))
	continue;

	// The elements beyond the truncation must be undef/zero.
	unsigned UpperElts = NumElts - NumSrcElts;
	if (UpperElts > 0 &&
	!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
	continue;
	bool UndefUppers =
	UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);

	// For offset truncations, ensure that the concat is cheap.
	if (Offset) {
	auto IsCheapConcat = [&](SDValue Lo, SDValue Hi) {
	if (Lo.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Hi.getOpcode() == ISD::EXTRACT_SUBVECTOR)
	return Lo.getOperand(0) == Hi.getOperand(0);
	if (ISD::isNormalLoad(Lo.getNode()) &&
	ISD::isNormalLoad(Hi.getNode())) {
	auto *LDLo = cast<LoadSDNode>(Lo);
	auto *LDHi = cast<LoadSDNode>(Hi);
	return DAG.areNonVolatileConsecutiveLoads(
	LDHi, LDLo, Lo.getValueType().getStoreSize(), 1);
	}
	return false;
	};
	if (!IsCheapConcat(V1, V2))
	continue;
	}

	// As we're using both sources then we need to concat them together
	// and truncate from the double-sized src.
	MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2);
	SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);

	MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
	MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
	Src = DAG.getBitcast(SrcVT, Src);

	// Shift the offset'd elements into place for the truncation.
	// TODO: Use getTargetVShiftByConstNode.
	if (Offset)
	Src = DAG.getNode(
	X86ISD::VSRLI, DL, SrcVT, Src,
	DAG.getTargetConstant(Offset * EltSizeInBits, DL, MVT::i8));

	return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);
	}
	}

	return SDValue();
	}

	/// Check whether a compaction lowering can be done by dropping even/odd
	/// elements and compute how many times even/odd elements must be dropped.
	///
	/// This handles shuffles which take every Nth element where N is a power of
	/// two. Example shuffle masks:
	///
	/// (even)
	/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
	/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
	/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
	/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
	/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
	/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
	///
	/// (odd)
	/// N = 1: 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14
	/// N = 1: 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
	///
	/// Any of these lanes can of course be undef.
	///
	/// This routine only supports N <= 3.
	/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
	/// for larger N.
	///
	/// \returns N above, or the number of times even/odd elements must be dropped
	/// if there is such a number. Otherwise returns zero.
	static int canLowerByDroppingElements(ArrayRef<int> Mask, bool MatchEven,
	bool IsSingleInput) {
	// The modulus for the shuffle vector entries is based on whether this is
	// a single input or not.
	int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
	assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
	"We should only be called with masks with a power-of-2 size!");

	uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
	int Offset = MatchEven ? 0 : 1;

	// We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
	// and 2^3 simultaneously. This is because we may have ambiguity with
	// partially undef inputs.
	bool ViableForN[3] = {true, true, true};

	for (int i = 0, e = Mask.size(); i < e; ++i) {
	// Ignore undef lanes, we'll optimistically collapse them to the pattern we
	// want.
	if (Mask[i] < 0)
	continue;

	bool IsAnyViable = false;
	for (unsigned j = 0; j != std::size(ViableForN); ++j)
	if (ViableForN[j]) {
	uint64_t N = j + 1;

	// The shuffle mask must be equal to (i * 2^N) % M.
	if ((uint64_t)(Mask[i] - Offset) == (((uint64_t)i << N) & ModMask))
	IsAnyViable = true;
	else
	ViableForN[j] = false;
	}
	// Early exit if we exhaust the possible powers of two.
	if (!IsAnyViable)
	break;
	}

	for (unsigned j = 0; j != std::size(ViableForN); ++j)
	if (ViableForN[j])
	return j + 1;

	// Return 0 as there is no viable power of two.
	return 0;
	}

	// X86 has dedicated pack instructions that can handle specific truncation
	// operations: PACKSS and PACKUS.
	// Checks for compaction shuffle masks if MaxStages > 1.
	// TODO: Add support for matching multiple PACKSS/PACKUS stages.
	static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
	unsigned &PackOpcode, ArrayRef<int> TargetMask,
	const SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	unsigned MaxStages = 1) {
	unsigned NumElts = VT.getVectorNumElements();
	unsigned BitSize = VT.getScalarSizeInBits();
	assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 &&
	"Illegal maximum compaction");

	auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) {
	unsigned NumSrcBits = PackVT.getScalarSizeInBits();
	unsigned NumPackedBits = NumSrcBits - BitSize;
	N1 = peekThroughBitcasts(N1);
	N2 = peekThroughBitcasts(N2);
	unsigned NumBits1 = N1.getScalarValueSizeInBits();
	unsigned NumBits2 = N2.getScalarValueSizeInBits();
	bool IsZero1 = llvm::isNullOrNullSplat(N1, /AllowUndefs/ false);
	bool IsZero2 = llvm::isNullOrNullSplat(N2, /AllowUndefs/ false);
	if ((!N1.isUndef() && !IsZero1 && NumBits1 != NumSrcBits) \|\|
	(!N2.isUndef() && !IsZero2 && NumBits2 != NumSrcBits))
	return false;
	if (Subtarget.hasSSE41() \|\| BitSize == 8) {
	APInt ZeroMask = APInt::getHighBitsSet(NumSrcBits, NumPackedBits);
	if ((N1.isUndef() \|\| IsZero1 \|\| DAG.MaskedValueIsZero(N1, ZeroMask)) &&
	(N2.isUndef() \|\| IsZero2 \|\| DAG.MaskedValueIsZero(N2, ZeroMask))) {
	V1 = N1;
	V2 = N2;
	SrcVT = PackVT;
	PackOpcode = X86ISD::PACKUS;
	return true;
	}
	}
	bool IsAllOnes1 = llvm::isAllOnesOrAllOnesSplat(N1, /AllowUndefs/ false);
	bool IsAllOnes2 = llvm::isAllOnesOrAllOnesSplat(N2, /AllowUndefs/ false);
	if ((N1.isUndef() \|\| IsZero1 \|\| IsAllOnes1 \|\|
	DAG.ComputeNumSignBits(N1) > NumPackedBits) &&
	(N2.isUndef() \|\| IsZero2 \|\| IsAllOnes2 \|\|
	DAG.ComputeNumSignBits(N2) > NumPackedBits)) {
	V1 = N1;
	V2 = N2;
	SrcVT = PackVT;
	PackOpcode = X86ISD::PACKSS;
	return true;
	}
	return false;
	};

	// Attempt to match against wider and wider compaction patterns.
	for (unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) {
	MVT PackSVT = MVT::getIntegerVT(BitSize << NumStages);
	MVT PackVT = MVT::getVectorVT(PackSVT, NumElts >> NumStages);

	// Try binary shuffle.
	SmallVector<int, 32> BinaryMask;
	createPackShuffleMask(VT, BinaryMask, false, NumStages);
	if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2))
	if (MatchPACK(V1, V2, PackVT))
	return true;

	// Try unary shuffle.
	SmallVector<int, 32> UnaryMask;
	createPackShuffleMask(VT, UnaryMask, true, NumStages);
	if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1))
	if (MatchPACK(V1, V1, PackVT))
	return true;
	}

	return false;
	}

	static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
	SDValue V1, SDValue V2, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT PackVT;
	unsigned PackOpcode;
	unsigned SizeBits = VT.getSizeInBits();
	unsigned EltBits = VT.getScalarSizeInBits();
	unsigned MaxStages = Log2_32(64 / EltBits);
	if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
	Subtarget, MaxStages))
	return SDValue();

	unsigned CurrentEltBits = PackVT.getScalarSizeInBits();
	unsigned NumStages = Log2_32(CurrentEltBits / EltBits);

	// Don't lower multi-stage packs on AVX512, truncation is better.
	if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX())
	return SDValue();

	// Pack to the largest type possible:
	// vXi64/vXi32 -> PACKSDW and vXi16 -> PACKSWB.
	unsigned MaxPackBits = 16;
	if (CurrentEltBits > 16 &&
	(PackOpcode == X86ISD::PACKSS \|\| Subtarget.hasSSE41()))
	MaxPackBits = 32;

	// Repeatedly pack down to the target size.
	SDValue Res;
	for (unsigned i = 0; i != NumStages; ++i) {
	unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits);
	unsigned NumSrcElts = SizeBits / SrcEltBits;
	MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
	MVT DstSVT = MVT::getIntegerVT(SrcEltBits / 2);
	MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
	MVT DstVT = MVT::getVectorVT(DstSVT, NumSrcElts * 2);
	Res = DAG.getNode(PackOpcode, DL, DstVT, DAG.getBitcast(SrcVT, V1),
	DAG.getBitcast(SrcVT, V2));
	V1 = V2 = Res;
	CurrentEltBits /= 2;
	}
	assert(Res && Res.getValueType() == VT &&
	"Failed to lower compaction shuffle");
	return Res;
	}

	/// Try to emit a bitmask instruction for a shuffle.
	///
	/// This handles cases where we can model a blend exactly as a bitmask due to
	/// one of the inputs being zeroable.
	static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT MaskVT = VT;
	MVT EltVT = VT.getVectorElementType();
	SDValue Zero, AllOnes;
	// Use f64 if i64 isn't legal.
	if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
	EltVT = MVT::f64;
	MaskVT = MVT::getVectorVT(EltVT, Mask.size());
	}

	MVT LogicVT = VT;
	if (EltVT == MVT::f32 \|\| EltVT == MVT::f64) {
	Zero = DAG.getConstantFP(0.0, DL, EltVT);
	APFloat AllOnesValue =
	APFloat::getAllOnesValue(SelectionDAG::EVTToAPFloatSemantics(EltVT));
	AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT);
	LogicVT =
	MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
	} else {
	Zero = DAG.getConstant(0, DL, EltVT);
	AllOnes = DAG.getAllOnesConstant(DL, EltVT);
	}

	SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
	SDValue V;
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	if (Zeroable[i])
	continue;
	if (Mask[i] % Size != i)
	return SDValue(); // Not a blend.
	if (!V)
	V = Mask[i] < Size ? V1 : V2;
	else if (V != (Mask[i] < Size ? V1 : V2))
	return SDValue(); // Can only let one input through the mask.

	VMaskOps[i] = AllOnes;
	}
	if (!V)
	return SDValue(); // No non-zeroable elements!

	SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
	VMask = DAG.getBitcast(LogicVT, VMask);
	V = DAG.getBitcast(LogicVT, V);
	SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
	return DAG.getBitcast(VT, And);
	}

	/// Try to emit a blend instruction for a shuffle using bit math.
	///
	/// This is used as a fallback approach when first class blend instructions are
	/// unavailable. Currently it is only suitable for integer vectors, but could
	/// be generalized for floating point vectors if desirable.
	static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(VT.isInteger() && "Only supports integer vector types!");
	MVT EltVT = VT.getVectorElementType();
	SDValue Zero = DAG.getConstant(0, DL, EltVT);
	SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
	SmallVector<SDValue, 16> MaskOps;
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size)
	return SDValue(); // Shuffled input!
	MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero);
	}

	SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
	V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
	V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2);
	return DAG.getNode(ISD::OR, DL, VT, V1, V2);
	}

	static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
	SDValue PreservedSrc,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG);

	static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
	MutableArrayRef<int> Mask,
	const APInt &Zeroable, bool &ForceV1Zero,
	bool &ForceV2Zero, uint64_t &BlendMask) {
	bool V1IsZeroOrUndef =
	V1.isUndef() \|\| ISD::isBuildVectorAllZeros(V1.getNode());
	bool V2IsZeroOrUndef =
	V2.isUndef() \|\| ISD::isBuildVectorAllZeros(V2.getNode());

	BlendMask = 0;
	ForceV1Zero = false, ForceV2Zero = false;
	assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");

	// Attempt to generate the binary blend mask. If an input is zero then
	// we can use any lane.
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	if (M == i \|\|
	(0 <= M && M < Size && IsElementEquivalent(Size, V1, V1, M, i))) {
	Mask[i] = i;
	continue;
	}
	if (M == (i + Size) \|\|
	(Size <= M && IsElementEquivalent(Size, V2, V2, M - Size, i))) {
	BlendMask \|= 1ull << i;
	Mask[i] = i + Size;
	continue;
	}
	if (Zeroable[i]) {
	if (V1IsZeroOrUndef) {
	ForceV1Zero = true;
	Mask[i] = i;
	continue;
	}
	if (V2IsZeroOrUndef) {
	ForceV2Zero = true;
	BlendMask \|= 1ull << i;
	Mask[i] = i + Size;
	continue;
	}
	}
	return false;
	}
	return true;
	}

	static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size,
	int Scale) {
	uint64_t ScaledMask = 0;
	for (int i = 0; i != Size; ++i)
	if (BlendMask & (1ull << i))
	ScaledMask \|= ((1ull << Scale) - 1) << (i * Scale);
	return ScaledMask;
	}

	/// Try to emit a blend instruction for a shuffle.
	///
	/// This doesn't do any checks for the availability of instructions for blending
	/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
	/// be matched in the backend with the type given. What it does check for is
	/// that the shuffle mask is a blend, or convertible into a blend with zero.
	static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Original,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	uint64_t BlendMask = 0;
	bool ForceV1Zero = false, ForceV2Zero = false;
	SmallVector<int, 64> Mask(Original);
	if (!matchShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
	BlendMask))
	return SDValue();

	// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
	if (ForceV1Zero)
	V1 = getZeroVector(VT, Subtarget, DAG, DL);
	if (ForceV2Zero)
	V2 = getZeroVector(VT, Subtarget, DAG, DL);

	unsigned NumElts = VT.getVectorNumElements();

	switch (VT.SimpleTy) {
	case MVT::v4i64:
	case MVT::v8i32:
	assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
	[[fallthrough]];
	case MVT::v4f64:
	case MVT::v8f32:
	assert(Subtarget.hasAVX() && "256-bit float blends require AVX!");
	[[fallthrough]];
	case MVT::v2f64:
	case MVT::v2i64:
	case MVT::v4f32:
	case MVT::v4i32:
	case MVT::v8i16:
	assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
	return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
	DAG.getTargetConstant(BlendMask, DL, MVT::i8));
	case MVT::v16i16: {
	assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
	SmallVector<int, 8> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
	// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
	assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
	BlendMask = 0;
	for (int i = 0; i < 8; ++i)
	if (RepeatedMask[i] >= 8)
	BlendMask \|= 1ull << i;
	return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
	DAG.getTargetConstant(BlendMask, DL, MVT::i8));
	}
	// Use PBLENDW for lower/upper lanes and then blend lanes.
	// TODO - we should allow 2 PBLENDW here and leave shuffle combine to
	// merge to VSELECT where useful.
	uint64_t LoMask = BlendMask & 0xFF;
	uint64_t HiMask = (BlendMask >> 8) & 0xFF;
	if (LoMask == 0 \|\| LoMask == 255 \|\| HiMask == 0 \|\| HiMask == 255) {
	SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
	DAG.getTargetConstant(LoMask, DL, MVT::i8));
	SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
	DAG.getTargetConstant(HiMask, DL, MVT::i8));
	return DAG.getVectorShuffle(
	MVT::v16i16, DL, Lo, Hi,
	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
	}
	[[fallthrough]];
	}
	case MVT::v32i8:
	assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!");
	[[fallthrough]];
	case MVT::v16i8: {
	assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!");

	// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
	if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return Masked;

	if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
	MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8));
	SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
	return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
	}

	// If we have VPTERNLOG, we can use that as a bit blend.
	if (Subtarget.hasVLX())
	if (SDValue BitBlend =
	lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
	return BitBlend;

	// Scale the blend by the number of bytes per element.
	int Scale = VT.getScalarSizeInBits() / 8;

	// This form of blend is always done on bytes. Compute the byte vector
	// type.
	MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);

	// x86 allows load folding with blendvb from the 2nd source operand. But
	// we are still using LLVM select here (see comment below), so that's V1.
	// If V2 can be load-folded and V1 cannot be load-folded, then commute to
	// allow that load-folding possibility.
	if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(V1, V2);
	}

	// Compute the VSELECT mask. Note that VSELECT is really confusing in the
	// mix of LLVM's code generator and the x86 backend. We tell the code
	// generator that boolean values in the elements of an x86 vector register
	// are -1 for true and 0 for false. We then use the LLVM semantics of 'true'
	// mapping a select to operand #1, and 'false' mapping to operand #2. The
	// reality in x86 is that vector masks (pre-AVX-512) use only the high bit
	// of the element (the remaining are ignored) and 0 in that high bit would
	// mean operand #1 while 1 in the high bit would mean operand #2. So while
	// the LLVM model for boolean values in vector elements gets the relevant
	// bit set, it is set backwards and over constrained relative to x86's
	// actual model.
	SmallVector<SDValue, 32> VSELECTMask;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	for (int j = 0; j < Scale; ++j)
	VSELECTMask.push_back(
	Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
	: DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
	MVT::i8));

	V1 = DAG.getBitcast(BlendVT, V1);
	V2 = DAG.getBitcast(BlendVT, V2);
	return DAG.getBitcast(
	VT,
	DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask),
	V1, V2));
	}
	case MVT::v16f32:
	case MVT::v8f64:
	case MVT::v8i64:
	case MVT::v16i32:
	case MVT::v32i16:
	case MVT::v64i8: {
	// Attempt to lower to a bitmask if we can. Only if not optimizing for size.
	bool OptForSize = DAG.shouldOptForSize();
	if (!OptForSize) {
	if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return Masked;
	}

	// Otherwise load an immediate into a GPR, cast to k-register, and use a
	// masked move.
	MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8));
	SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
	return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
	}
	default:
	llvm_unreachable("Not a supported integer vector type!");
	}
	}

	/// Try to lower as a blend of elements from two inputs followed by
	/// a single-input permutation.
	///
	/// This matches the pattern where we can blend elements from two inputs and
	/// then reduce the shuffle to a single-input permutation.
	static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG,
	bool ImmBlends = false) {
	// We build up the blend mask while checking whether a blend is a viable way
	// to reduce the shuffle.
	SmallVector<int, 32> BlendMask(Mask.size(), -1);
	SmallVector<int, 32> PermuteMask(Mask.size(), -1);

	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds.");

	if (BlendMask[Mask[i] % Size] < 0)
	BlendMask[Mask[i] % Size] = Mask[i];
	else if (BlendMask[Mask[i] % Size] != Mask[i])
	return SDValue(); // Can't blend in the needed input!

	PermuteMask[i] = Mask[i] % Size;
	}

	// If only immediate blends, then bail if the blend mask can't be widened to
	// i16.
	unsigned EltSize = VT.getScalarSizeInBits();
	if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask))
	return SDValue();

	SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
	return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
	}

	/// Try to lower as an unpack of elements from two inputs followed by
	/// a single-input permutation.
	///
	/// This matches the pattern where we can unpack elements from two inputs and
	/// then reduce the shuffle to a single-input (wider) permutation.
	static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	int NumElts = Mask.size();
	int NumLanes = VT.getSizeInBits() / 128;
	int NumLaneElts = NumElts / NumLanes;
	int NumHalfLaneElts = NumLaneElts / 2;

	bool MatchLo = true, MatchHi = true;
	SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};

	// Determine UNPCKL/UNPCKH type and operand order.
	for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
	for (int Elt = 0; Elt != NumLaneElts; ++Elt) {
	int M = Mask[Lane + Elt];
	if (M < 0)
	continue;

	SDValue &Op = Ops[Elt & 1];
	if (M < NumElts && (Op.isUndef() \|\| Op == V1))
	Op = V1;
	else if (NumElts <= M && (Op.isUndef() \|\| Op == V2))
	Op = V2;
	else
	return SDValue();

	int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts;
	MatchLo &= isUndefOrInRange(M, Lo, Mid) \|\|
	isUndefOrInRange(M, NumElts + Lo, NumElts + Mid);
	MatchHi &= isUndefOrInRange(M, Mid, Hi) \|\|
	isUndefOrInRange(M, NumElts + Mid, NumElts + Hi);
	if (!MatchLo && !MatchHi)
	return SDValue();
	}
	}
	assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI");

	// Now check that each pair of elts come from the same unpack pair
	// and set the permute mask based on each pair.
	// TODO - Investigate cases where we permute individual elements.
	SmallVector<int, 32> PermuteMask(NumElts, -1);
	for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
	for (int Elt = 0; Elt != NumLaneElts; Elt += 2) {
	int M0 = Mask[Lane + Elt + 0];
	int M1 = Mask[Lane + Elt + 1];
	if (0 <= M0 && 0 <= M1 &&
	(M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts))
	return SDValue();
	if (0 <= M0)
	PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts));
	if (0 <= M1)
	PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1;
	}
	}

	unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
	SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops);
	return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask);
	}

	/// Try to lower a shuffle as a permute of the inputs followed by an
	/// UNPCK instruction.
	///
	/// This specifically targets cases where we end up with alternating between
	/// the two inputs, and so can permute them into something that feeds a single
	/// UNPCK instruction. Note that this routine only targets integer vectors
	/// because for floating point vectors we have a generalized SHUFPS lowering
	/// strategy that handles everything that doesn't exactly match an unpack,
	/// making this clever lowering unnecessary.
	static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Size = Mask.size();
	assert(Mask.size() >= 2 && "Single element masks are invalid.");

	// This routine only supports 128-bit integer dual input vectors.
	if (VT.isFloatingPoint() \|\| !VT.is128BitVector() \|\| V2.isUndef())
	return SDValue();

	int NumLoInputs =
	count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; });
	int NumHiInputs =
	count_if(Mask, [Size](int M) { return M % Size >= Size / 2; });

	bool UnpackLo = NumLoInputs >= NumHiInputs;

	auto TryUnpack = [&](int ScalarSize, int Scale) {
	SmallVector<int, 16> V1Mask((unsigned)Size, -1);
	SmallVector<int, 16> V2Mask((unsigned)Size, -1);

	for (int i = 0; i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	// Each element of the unpack contains Scale elements from this mask.
	int UnpackIdx = i / Scale;

	// We only handle the case where V1 feeds the first slots of the unpack.
	// We rely on canonicalization to ensure this is the case.
	if ((UnpackIdx % 2 == 0) != (Mask[i] < Size))
	return SDValue();

	// Setup the mask for this input. The indexing is tricky as we have to
	// handle the unpack stride.
	SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask;
	VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] =
	Mask[i] % Size;
	}

	// If we will have to shuffle both inputs to use the unpack, check whether
	// we can just unpack first and shuffle the result. If so, skip this unpack.
	if ((NumLoInputs == 0 \|\| NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) &&
	!isNoopShuffleMask(V2Mask))
	return SDValue();

	// Shuffle the inputs into place.
	V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
	V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);

	// Cast the inputs to the type we will use to unpack them.
	MVT UnpackVT =
	MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale);
	V1 = DAG.getBitcast(UnpackVT, V1);
	V2 = DAG.getBitcast(UnpackVT, V2);

	// Unpack the inputs and cast the result back to the desired type.
	return DAG.getBitcast(
	VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
	UnpackVT, V1, V2));
	};

	// We try each unpack from the largest to the smallest to try and find one
	// that fits this mask.
	int OrigScalarSize = VT.getScalarSizeInBits();
	for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2)
	if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
	return Unpack;

	// If we're shuffling with a zero vector then we're better off not doing
	// VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements.
	if (ISD::isBuildVectorAllZeros(V1.getNode()) \|\|
	ISD::isBuildVectorAllZeros(V2.getNode()))
	return SDValue();

	// If none of the unpack-rooted lowerings worked (or were profitable) try an
	// initial unpack.
	if (NumLoInputs == 0 \|\| NumHiInputs == 0) {
	assert((NumLoInputs > 0 \|\| NumHiInputs > 0) &&
	"We have to have some inputs!");
	int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0;

	// FIXME: We could consider the total complexity of the permute of each
	// possible unpacking. Or at the least we should consider how many
	// half-crossings are created.
	// FIXME: We could consider commuting the unpacks.

	SmallVector<int, 32> PermMask((unsigned)Size, -1);
	for (int i = 0; i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!");

	PermMask[i] =
	2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);
	}
	return DAG.getVectorShuffle(
	VT, DL,
	DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT,
	V1, V2),
	DAG.getUNDEF(VT), PermMask);
	}

	return SDValue();
	}

	/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
	/// permuting the elements of the result in place.
	static SDValue lowerShuffleAsByteRotateAndPermute(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) \|\|
	(VT.is256BitVector() && !Subtarget.hasAVX2()) \|\|
	(VT.is512BitVector() && !Subtarget.hasBWI()))
	return SDValue();

	// We don't currently support lane crossing permutes.
	if (is128BitLaneCrossingShuffleMask(VT, Mask))
	return SDValue();

	int Scale = VT.getScalarSizeInBits() / 8;
	int NumLanes = VT.getSizeInBits() / 128;
	int NumElts = VT.getVectorNumElements();
	int NumEltsPerLane = NumElts / NumLanes;

	// Determine range of mask elts.
	bool Blend1 = true;
	bool Blend2 = true;
	std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN);
	std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN);
	for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
	for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
	int M = Mask[Lane + Elt];
	if (M < 0)
	continue;
	if (M < NumElts) {
	Blend1 &= (M == (Lane + Elt));
	assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
	M = M % NumEltsPerLane;
	Range1.first = std::min(Range1.first, M);
	Range1.second = std::max(Range1.second, M);
	} else {
	M -= NumElts;
	Blend2 &= (M == (Lane + Elt));
	assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
	M = M % NumEltsPerLane;
	Range2.first = std::min(Range2.first, M);
	Range2.second = std::max(Range2.second, M);
	}
	}
	}

	// Bail if we don't need both elements.
	// TODO - it might be worth doing this for unary shuffles if the permute
	// can be widened.
	if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) \|\|
	!(0 <= Range2.first && Range2.second < NumEltsPerLane))
	return SDValue();

	if (VT.getSizeInBits() > 128 && (Blend1 \|\| Blend2))
	return SDValue();

	// Rotate the 2 ops so we can access both ranges, then permute the result.
	auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) {
	MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
	SDValue Rotate = DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
	DAG.getBitcast(ByteVT, Lo),
	DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8)));
	SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
	for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
	for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
	int M = Mask[Lane + Elt];
	if (M < 0)
	continue;
	if (M < NumElts)
	PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);
	else
	PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);
	}
	}
	return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask);
	};

	// Check if the ranges are small enough to rotate from either direction.
	if (Range2.second < Range1.first)
	return RotateAndPermute(V1, V2, Range1.first, 0);
	if (Range1.second < Range2.first)
	return RotateAndPermute(V2, V1, Range2.first, NumElts);
	return SDValue();
	}

	static bool isBroadcastShuffleMask(ArrayRef<int> Mask) {
	return isUndefOrEqual(Mask, 0);
	}

	static bool isNoopOrBroadcastShuffleMask(ArrayRef<int> Mask) {
	return isNoopShuffleMask(Mask) \|\| isBroadcastShuffleMask(Mask);
	}

	/// Generic routine to decompose a shuffle and blend into independent
	/// blends and permutes.
	///
	/// This matches the extremely common pattern for handling combined
	/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
	/// operations. It will try to pick the best arrangement of shuffles and
	/// blends. For vXi8/vXi16 shuffles we may use unpack instead of blend.
	static SDValue lowerShuffleAsDecomposedShuffleMerge(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	int NumElts = Mask.size();
	int NumLanes = VT.getSizeInBits() / 128;
	int NumEltsPerLane = NumElts / NumLanes;

	// Shuffle the input elements into the desired positions in V1 and V2 and
	// unpack/blend them together.
	bool IsAlternating = true;
	SmallVector<int, 32> V1Mask(NumElts, -1);
	SmallVector<int, 32> V2Mask(NumElts, -1);
	SmallVector<int, 32> FinalMask(NumElts, -1);
	for (int i = 0; i < NumElts; ++i) {
	int M = Mask[i];
	if (M >= 0 && M < NumElts) {
	V1Mask[i] = M;
	FinalMask[i] = i;
	IsAlternating &= (i & 1) == 0;
	} else if (M >= NumElts) {
	V2Mask[i] = M - NumElts;
	FinalMask[i] = i + NumElts;
	IsAlternating &= (i & 1) == 1;
	}
	}

	// If we effectively only demand the 0'th element of \p Input, and not only
	// as 0'th element, then broadcast said input,
	// and change \p InputMask to be a no-op (identity) mask.
	auto canonicalizeBroadcastableInput = [DL, VT, &Subtarget,
	&DAG](SDValue &Input,
	MutableArrayRef<int> InputMask) {
	unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
	if (!Subtarget.hasAVX2() && (!Subtarget.hasAVX() \|\| EltSizeInBits < 32 \|\|
	!X86::mayFoldLoad(Input, Subtarget)))
	return;
	if (isNoopShuffleMask(InputMask))
	return;
	assert(isBroadcastShuffleMask(InputMask) &&
	"Expected to demand only the 0'th element.");
	Input = DAG.getNode(X86ISD::VBROADCAST, DL, VT, Input);
	for (auto I : enumerate(InputMask)) {
	int &InputMaskElt = I.value();
	if (InputMaskElt >= 0)
	InputMaskElt = I.index();
	}
	};

	// Currently, we may need to produce one shuffle per input, and blend results.
	// It is possible that the shuffle for one of the inputs is already a no-op.
	// See if we can simplify non-no-op shuffles into broadcasts,
	// which we consider to be strictly better than an arbitrary shuffle.
	if (isNoopOrBroadcastShuffleMask(V1Mask) &&
	isNoopOrBroadcastShuffleMask(V2Mask)) {
	canonicalizeBroadcastableInput(V1, V1Mask);
	canonicalizeBroadcastableInput(V2, V2Mask);
	}

	// Try to lower with the simpler initial blend/unpack/rotate strategies unless
	// one of the input shuffles would be a no-op. We prefer to shuffle inputs as
	// the shuffle may be able to fold with a load or other benefit. However, when
	// we'll have to do 2x as many shuffles in order to achieve this, a 2-input
	// pre-shuffle first is a better strategy.
	if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
	// Only prefer immediate blends to unpack/rotate.
	if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
	DAG, true))
	return BlendPerm;
	if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask,
	DAG))
	return UnpackPerm;
	if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute(
	DL, VT, V1, V2, Mask, Subtarget, DAG))
	return RotatePerm;
	// Unpack/rotate failed - try again with variable blends.
	if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
	DAG))
	return BlendPerm;
	if (VT.getScalarSizeInBits() >= 32)
	if (SDValue PermUnpack = lowerShuffleAsPermuteAndUnpack(
	DL, VT, V1, V2, Mask, Subtarget, DAG))
	return PermUnpack;
	}

	// If the final mask is an alternating blend of vXi8/vXi16, convert to an
	// UNPCKL(SHUFFLE, SHUFFLE) pattern.
	// TODO: It doesn't have to be alternating - but each lane mustn't have more
	// than half the elements coming from each source.
	if (IsAlternating && VT.getScalarSizeInBits() < 32) {
	V1Mask.assign(NumElts, -1);
	V2Mask.assign(NumElts, -1);
	FinalMask.assign(NumElts, -1);
	for (int i = 0; i != NumElts; i += NumEltsPerLane)
	for (int j = 0; j != NumEltsPerLane; ++j) {
	int M = Mask[i + j];
	if (M >= 0 && M < NumElts) {
	V1Mask[i + (j / 2)] = M;
	FinalMask[i + j] = i + (j / 2);
	} else if (M >= NumElts) {
	V2Mask[i + (j / 2)] = M - NumElts;
	FinalMask[i + j] = i + (j / 2) + NumElts;
	}
	}
	}

	V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
	V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
	return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask);
	}

	/// Try to lower a vector shuffle as a bit rotation.
	///
	/// Look for a repeated rotation pattern in each sub group.
	/// Returns a ISD::ROTL element rotation amount or -1 if failed.
	static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) {
	int NumElts = Mask.size();
	assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask");

	int RotateAmt = -1;
	for (int i = 0; i != NumElts; i += NumSubElts) {
	for (int j = 0; j != NumSubElts; ++j) {
	int M = Mask[i + j];
	if (M < 0)
	continue;
	if (!isInRange(M, i, i + NumSubElts))
	return -1;
	int Offset = (NumSubElts - (M - (i + j))) % NumSubElts;
	if (0 <= RotateAmt && Offset != RotateAmt)
	return -1;
	RotateAmt = Offset;
	}
	}
	return RotateAmt;
	}

	static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits,
	const X86Subtarget &Subtarget,
	ArrayRef<int> Mask) {
	assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
	assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers");

	// AVX512 only has vXi32/vXi64 rotates, so limit the rotation sub group size.
	int MinSubElts = Subtarget.hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2;
	int MaxSubElts = 64 / EltSizeInBits;
	for (int NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) {
	int RotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts);
	if (RotateAmt < 0)
	continue;

	int NumElts = Mask.size();
	MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts);
	RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts);
	return RotateAmt * EltSizeInBits;
	}

	return -1;
	}

	/// Lower shuffle using X86ISD::VROTLI rotations.
	static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1,
	ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// Only XOP + AVX512 targets have bit rotation instructions.
	// If we at least have SSSE3 (PSHUFB) then we shouldn't attempt to use this.
	bool IsLegal =
	(VT.is128BitVector() && Subtarget.hasXOP()) \|\| Subtarget.hasAVX512();
	if (!IsLegal && Subtarget.hasSSE3())
	return SDValue();

	MVT RotateVT;
	int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(),
	Subtarget, Mask);
	if (RotateAmt < 0)
	return SDValue();

	// For pre-SSSE3 targets, if we are shuffling vXi8 elts then ISD::ROTL,
	// expanded to OR(SRL,SHL), will be more efficient, but if they can
	// widen to vXi16 or more then existing lowering should will be better.
	if (!IsLegal) {
	if ((RotateAmt % 16) == 0)
	return SDValue();
	// TODO: Use getTargetVShiftByConstNode.
	unsigned ShlAmt = RotateAmt;
	unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt;
	V1 = DAG.getBitcast(RotateVT, V1);
	SDValue SHL = DAG.getNode(X86ISD::VSHLI, DL, RotateVT, V1,
	DAG.getTargetConstant(ShlAmt, DL, MVT::i8));
	SDValue SRL = DAG.getNode(X86ISD::VSRLI, DL, RotateVT, V1,
	DAG.getTargetConstant(SrlAmt, DL, MVT::i8));
	SDValue Rot = DAG.getNode(ISD::OR, DL, RotateVT, SHL, SRL);
	return DAG.getBitcast(VT, Rot);
	}

	SDValue Rot =
	DAG.getNode(X86ISD::VROTLI, DL, RotateVT, DAG.getBitcast(RotateVT, V1),
	DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
	return DAG.getBitcast(VT, Rot);
	}

	/// Try to match a vector shuffle as an element rotation.
	///
	/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
	static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask) {
	int NumElts = Mask.size();

	// We need to detect various ways of spelling a rotation:
	// [11, 12, 13, 14, 15, 0, 1, 2]
	// [-1, 12, 13, 14, -1, -1, 1, -1]
	// [-1, -1, -1, -1, -1, -1, 1, 2]
	// [ 3, 4, 5, 6, 7, 8, 9, 10]
	// [-1, 4, 5, 6, -1, -1, 9, -1]
	// [-1, 4, 5, 6, -1, -1, -1, -1]
	int Rotation = 0;
	SDValue Lo, Hi;
	for (int i = 0; i < NumElts; ++i) {
	int M = Mask[i];
	assert((M == SM_SentinelUndef \|\| (0 <= M && M < (2*NumElts))) &&
	"Unexpected mask index.");
	if (M < 0)
	continue;

	// Determine where a rotated vector would have started.
	int StartIdx = i - (M % NumElts);
	if (StartIdx == 0)
	// The identity rotation isn't interesting, stop.
	return -1;

	// If we found the tail of a vector the rotation must be the missing
	// front. If we found the head of a vector, it must be how much of the
	// head.
	int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;

	if (Rotation == 0)
	Rotation = CandidateRotation;
	else if (Rotation != CandidateRotation)
	// The rotations don't match, so we can't match this mask.
	return -1;

	// Compute which value this mask is pointing at.
	SDValue MaskV = M < NumElts ? V1 : V2;

	// Compute which of the two target values this index should be assigned
	// to. This reflects whether the high elements are remaining or the low
	// elements are remaining.
	SDValue &TargetV = StartIdx < 0 ? Hi : Lo;

	// Either set up this value if we've not encountered it before, or check
	// that it remains consistent.
	if (!TargetV)
	TargetV = MaskV;
	else if (TargetV != MaskV)
	// This may be a rotation, but it pulls from the inputs in some
	// unsupported interleaving.
	return -1;
	}

	// Check that we successfully analyzed the mask, and normalize the results.
	assert(Rotation != 0 && "Failed to locate a viable rotation!");
	assert((Lo \|\| Hi) && "Failed to find a rotated input vector!");
	if (!Lo)
	Lo = Hi;
	else if (!Hi)
	Hi = Lo;

	V1 = Lo;
	V2 = Hi;

	return Rotation;
	}

	/// Try to lower a vector shuffle as a byte rotation.
	///
	/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
	/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
	/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
	/// try to generically lower a vector shuffle through such an pattern. It
	/// does not check for the profitability of lowering either as PALIGNR or
	/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
	/// This matches shuffle vectors that look like:
	///
	/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
	///
	/// Essentially it concatenates V1 and V2, shifts right by some number of
	/// elements, and takes the low elements as the result. Note that while this is
	/// specified as a right shift because x86 is little-endian, it is a *left
	/// rotate* of the vector lanes.
	static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask) {
	// Don't accept any shuffles with zero elements.
	if (isAnyZero(Mask))
	return -1;

	// PALIGNR works on 128-bit lanes.
	SmallVector<int, 16> RepeatedMask;
	if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
	return -1;

	int Rotation = matchShuffleAsElementRotate(V1, V2, RepeatedMask);
	if (Rotation <= 0)
	return -1;

	// PALIGNR rotates bytes, so we need to scale the
	// rotation based on how many bytes are in the vector lane.
	int NumElts = RepeatedMask.size();
	int Scale = 16 / NumElts;
	return Rotation * Scale;
	}

	static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");

	SDValue Lo = V1, Hi = V2;
	int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
	if (ByteRotation <= 0)
	return SDValue();

	// Cast the inputs to i8 vector of correct length to match PALIGNR or
	// PSLLDQ/PSRLDQ.
	MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
	Lo = DAG.getBitcast(ByteVT, Lo);
	Hi = DAG.getBitcast(ByteVT, Hi);

	// SSSE3 targets can use the palignr instruction.
	if (Subtarget.hasSSSE3()) {
	assert((!VT.is512BitVector() \|\| Subtarget.hasBWI()) &&
	"512-bit PALIGNR requires BWI instructions");
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
	DAG.getTargetConstant(ByteRotation, DL, MVT::i8)));
	}

	assert(VT.is128BitVector() &&
	"Rotate-based lowering only supports 128-bit lowering!");
	assert(Mask.size() <= 16 &&
	"Can shuffle at most 16 bytes in a 128-bit vector!");
	assert(ByteVT == MVT::v16i8 &&
	"SSE2 rotate lowering only needed for v16i8!");

	// Default SSE2 implementation
	int LoByteShift = 16 - ByteRotation;
	int HiByteShift = ByteRotation;

	SDValue LoShift =
	DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
	DAG.getTargetConstant(LoByteShift, DL, MVT::i8));
	SDValue HiShift =
	DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
	DAG.getTargetConstant(HiByteShift, DL, MVT::i8));
	return DAG.getBitcast(VT,
	DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
	}

	/// Try to lower a vector shuffle as a dword/qword rotation.
	///
	/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary
	/// rotation of the concatenation of two vectors; This routine will
	/// try to generically lower a vector shuffle through such an pattern.
	///
	/// Essentially it concatenates V1 and V2, shifts right by some number of
	/// elements, and takes the low elements as the result. Note that while this is
	/// specified as a right shift because x86 is little-endian, it is a *left
	/// rotate* of the vector lanes.
	static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert((VT.getScalarType() == MVT::i32 \|\| VT.getScalarType() == MVT::i64) &&
	"Only 32-bit and 64-bit elements are supported!");

	// 128/256-bit vectors are only supported with VLX.
	assert((Subtarget.hasVLX() \|\| (!VT.is128BitVector() && !VT.is256BitVector()))
	&& "VLX required for 128/256-bit vectors");

	SDValue Lo = V1, Hi = V2;
	int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask);
	if (Rotation <= 0)
	return SDValue();

	return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
	DAG.getTargetConstant(Rotation, DL, MVT::i8));
	}

	/// Try to lower a vector shuffle as a byte shift sequence.
	static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
	assert(VT.is128BitVector() && "Only 128-bit vectors supported");

	// We need a shuffle that has zeros at one/both ends and a sequential
	// shuffle from one source within.
	unsigned ZeroLo = Zeroable.countTrailingOnes();
	unsigned ZeroHi = Zeroable.countLeadingOnes();
	if (!ZeroLo && !ZeroHi)
	return SDValue();

	unsigned NumElts = Mask.size();
	unsigned Len = NumElts - (ZeroLo + ZeroHi);
	if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo]))
	return SDValue();

	unsigned Scale = VT.getScalarSizeInBits() / 8;
	ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len);
	if (!isUndefOrInRange(StubMask, 0, NumElts) &&
	!isUndefOrInRange(StubMask, NumElts, 2 * NumElts))
	return SDValue();

	SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
	Res = DAG.getBitcast(MVT::v16i8, Res);

	// Use VSHLDQ/VSRLDQ ops to zero the ends of a vector and leave an
	// inner sequential set of elements, possibly offset:
	// 01234567 --> zzzzzz01 --> 1zzzzzzz
	// 01234567 --> 4567zzzz --> zzzzz456
	// 01234567 --> z0123456 --> 3456zzzz --> zz3456zz
	if (ZeroLo == 0) {
	unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
	Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
	Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8));
	} else if (ZeroHi == 0) {
	unsigned Shift = Mask[ZeroLo] % NumElts;
	Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
	Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
	} else if (!Subtarget.hasSSSE3()) {
	// If we don't have PSHUFB then its worth avoiding an AND constant mask
	// by performing 3 byte shifts. Shuffle combining can kick in above that.
	// TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
	unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
	Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
	Shift += Mask[ZeroLo] % NumElts;
	Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
	Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
	DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
	} else
	return SDValue();

	return DAG.getBitcast(VT, Res);
	}

	/// Try to lower a vector shuffle as a bit shift (shifts in zeros).
	///
	/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
	/// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
	/// matches elements from one of the input vectors shuffled to the left or
	/// right with zeroable elements 'shifted in'. It handles both the strictly
	/// bit-wise element shifts and the byte shift across an entire 128-bit double
	/// quad word lane.
	///
	/// PSHL : (little-endian) left bit shift.
	/// [ zz, 0, zz, 2 ]
	/// [ -1, 4, zz, -1 ]
	/// PSRL : (little-endian) right bit shift.
	/// [ 1, zz, 3, zz]
	/// [ -1, -1, 7, zz]
	/// PSLLDQ : (little-endian) left byte shift
	/// [ zz, 0, 1, 2, 3, 4, 5, 6]
	/// [ zz, zz, -1, -1, 2, 3, 4, -1]
	/// [ zz, zz, zz, zz, zz, zz, -1, 1]
	/// PSRLDQ : (little-endian) right byte shift
	/// [ 5, 6, 7, zz, zz, zz, zz, zz]
	/// [ -1, 5, 6, 7, zz, zz, zz, zz]
	/// [ 1, 2, -1, -1, -1, -1, zz, zz]
	static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
	unsigned ScalarSizeInBits, ArrayRef<int> Mask,
	int MaskOffset, const APInt &Zeroable,
	const X86Subtarget &Subtarget) {
	int Size = Mask.size();
	unsigned SizeInBits = Size * ScalarSizeInBits;

	auto CheckZeros = [&](int Shift, int Scale, bool Left) {
	for (int i = 0; i < Size; i += Scale)
	for (int j = 0; j < Shift; ++j)
	if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
	return false;

	return true;
	};

	auto MatchShift = [&](int Shift, int Scale, bool Left) {
	for (int i = 0; i != Size; i += Scale) {
	unsigned Pos = Left ? i + Shift : i;
	unsigned Low = Left ? i : i + Shift;
	unsigned Len = Scale - Shift;
	if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset))
	return -1;
	}

	int ShiftEltBits = ScalarSizeInBits * Scale;
	bool ByteShift = ShiftEltBits > 64;
	Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
	: (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
	int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);

	// Normalize the scale for byte shifts to still produce an i64 element
	// type.
	Scale = ByteShift ? Scale / 2 : Scale;

	// We need to round trip through the appropriate type for the shift.
	MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
	ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
	: MVT::getVectorVT(ShiftSVT, Size / Scale);
	return (int)ShiftAmt;
	};

	// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
	// keep doubling the size of the integer elements up to that. We can
	// then shift the elements of the integer vector by whole multiples of
	// their width within the elements of the larger integer vector. Test each
	// multiple to see if we can find a match with the moved element indices
	// and that the shifted in elements are all zeroable.
	unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128);
	for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
	for (int Shift = 1; Shift != Scale; ++Shift)
	for (bool Left : {true, false})
	if (CheckZeros(Shift, Scale, Left)) {
	int ShiftAmt = MatchShift(Shift, Scale, Left);
	if (0 < ShiftAmt)
	return ShiftAmt;
	}

	// no match
	return -1;
	}

	static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Size = Mask.size();
	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");

	MVT ShiftVT;
	SDValue V = V1;
	unsigned Opcode;

	// Try to match shuffle against V1 shift.
	int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
	Mask, 0, Zeroable, Subtarget);

	// If V1 failed, try to match shuffle against V2 shift.
	if (ShiftAmt < 0) {
	ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
	Mask, Size, Zeroable, Subtarget);
	V = V2;
	}

	if (ShiftAmt < 0)
	return SDValue();

	assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
	"Illegal integer vector type");
	V = DAG.getBitcast(ShiftVT, V);
	V = DAG.getNode(Opcode, DL, ShiftVT, V,
	DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
	return DAG.getBitcast(VT, V);
	}

	// EXTRQ: Extract Len elements from lower half of source, starting at Idx.
	// Remainder of lower half result is zero and upper half is all undef.
	static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask, uint64_t &BitLen,
	uint64_t &BitIdx, const APInt &Zeroable) {
	int Size = Mask.size();
	int HalfSize = Size / 2;
	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
	assert(!Zeroable.isAllOnes() && "Fully zeroable shuffle mask");

	// Upper half must be undefined.
	if (!isUndefUpperHalf(Mask))
	return false;

	// Determine the extraction length from the part of the
	// lower half that isn't zeroable.
	int Len = HalfSize;
	for (; Len > 0; --Len)
	if (!Zeroable[Len - 1])
	break;
	assert(Len > 0 && "Zeroable shuffle mask");

	// Attempt to match first Len sequential elements from the lower half.
	SDValue Src;
	int Idx = -1;
	for (int i = 0; i != Len; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	SDValue &V = (M < Size ? V1 : V2);
	M = M % Size;

	// The extracted elements must start at a valid index and all mask
	// elements must be in the lower half.
	if (i > M \|\| M >= HalfSize)
	return false;

	if (Idx < 0 \|\| (Src == V && Idx == (M - i))) {
	Src = V;
	Idx = M - i;
	continue;
	}
	return false;
	}

	if (!Src \|\| Idx < 0)
	return false;

	assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
	BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
	BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
	V1 = Src;
	return true;
	}

	// INSERTQ: Extract lowest Len elements from lower half of second source and
	// insert over first source, starting at Idx.
	// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
	static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask, uint64_t &BitLen,
	uint64_t &BitIdx) {
	int Size = Mask.size();
	int HalfSize = Size / 2;
	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");

	// Upper half must be undefined.
	if (!isUndefUpperHalf(Mask))
	return false;

	for (int Idx = 0; Idx != HalfSize; ++Idx) {
	SDValue Base;

	// Attempt to match first source from mask before insertion point.
	if (isUndefInRange(Mask, 0, Idx)) {
	/* EMPTY */
	} else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
	Base = V1;
	} else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
	Base = V2;
	} else {
	continue;
	}

	// Extend the extraction length looking to match both the insertion of
	// the second source and the remaining elements of the first.
	for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
	SDValue Insert;
	int Len = Hi - Idx;

	// Match insertion.
	if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
	Insert = V1;
	} else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
	Insert = V2;
	} else {
	continue;
	}

	// Match the remaining elements of the lower half.
	if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
	/* EMPTY */
	} else if ((!Base \|\| (Base == V1)) &&
	isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
	Base = V1;
	} else if ((!Base \|\| (Base == V2)) &&
	isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
	Size + Hi)) {
	Base = V2;
	} else {
	continue;
	}

	BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
	BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
	V1 = Base;
	V2 = Insert;
	return true;
	}
	}

	return false;
	}

	/// Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
	static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, SelectionDAG &DAG) {
	uint64_t BitLen, BitIdx;
	if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
	return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
	DAG.getTargetConstant(BitLen, DL, MVT::i8),
	DAG.getTargetConstant(BitIdx, DL, MVT::i8));

	if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
	return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
	V2 ? V2 : DAG.getUNDEF(VT),
	DAG.getTargetConstant(BitLen, DL, MVT::i8),
	DAG.getTargetConstant(BitIdx, DL, MVT::i8));

	return SDValue();
	}

	/// Lower a vector shuffle as a zero or any extension.
	///
	/// Given a specific number of elements, element bit width, and extension
	/// stride, produce either a zero or any extension based on the available
	/// features of the subtarget. The extended elements are consecutive and
	/// begin and can start from an offsetted element index in the input; to
	/// avoid excess shuffling the offset must either being in the bottom lane
	/// or at the start of a higher lane. All extended elements must be from
	/// the same lane.
	static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
	const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
	ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	assert(Scale > 1 && "Need a scale to extend.");
	int EltBits = VT.getScalarSizeInBits();
	int NumElements = VT.getVectorNumElements();
	int NumEltsPerLane = 128 / EltBits;
	int OffsetLane = Offset / NumEltsPerLane;
	assert((EltBits == 8 \|\| EltBits == 16 \|\| EltBits == 32) &&
	"Only 8, 16, and 32 bit elements can be extended.");
	assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
	assert(0 <= Offset && "Extension offset must be positive.");
	assert((Offset < NumEltsPerLane \|\| Offset % NumEltsPerLane == 0) &&
	"Extension offset must be in the first lane or start an upper lane.");

	// Check that an index is in same lane as the base offset.
	auto SafeOffset = [&](int Idx) {
	return OffsetLane == (Idx / NumEltsPerLane);
	};

	// Shift along an input so that the offset base moves to the first element.
	auto ShuffleOffset = [&](SDValue V) {
	if (!Offset)
	return V;

	SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
	for (int i = 0; i * Scale < NumElements; ++i) {
	int SrcIdx = i + Offset;
	ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
	}
	return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
	};

	// Found a valid a/zext mask! Try various lowering strategies based on the
	// input type and available ISA extensions.
	if (Subtarget.hasSSE41()) {
	// Not worth offsetting 128-bit vectors if scale == 2, a pattern using
	// PUNPCK will catch this in a later shuffle match.
	if (Offset && Scale == 2 && VT.is128BitVector())
	return SDValue();
	MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
	NumElements / Scale);
	InputV = ShuffleOffset(InputV);
	InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND,
	DL, ExtVT, InputV, DAG);
	return DAG.getBitcast(VT, InputV);
	}

	assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");

	// For any extends we can cheat for larger element sizes and use shuffle
	// instructions that can fold with a load and/or copy.
	if (AnyExt && EltBits == 32) {
	int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,
	-1};
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
	DAG.getBitcast(MVT::v4i32, InputV),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
	}
	if (AnyExt && EltBits == 16 && Scale > 2) {
	int PSHUFDMask[4] = {Offset / 2, -1,
	SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};
	InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
	DAG.getBitcast(MVT::v4i32, InputV),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
	int PSHUFWMask[4] = {1, -1, -1, -1};
	unsigned OddEvenOp = (Offset & 1) ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
	return DAG.getBitcast(
	VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
	DAG.getBitcast(MVT::v8i16, InputV),
	getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG)));
	}

	// The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
	// to 64-bits.
	if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
	assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
	assert(VT.is128BitVector() && "Unexpected vector width!");

	int LoIdx = Offset * EltBits;
	SDValue Lo = DAG.getBitcast(
	MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
	DAG.getTargetConstant(EltBits, DL, MVT::i8),
	DAG.getTargetConstant(LoIdx, DL, MVT::i8)));

	if (isUndefUpperHalf(Mask) \|\| !SafeOffset(Offset + 1))
	return DAG.getBitcast(VT, Lo);

	int HiIdx = (Offset + 1) * EltBits;
	SDValue Hi = DAG.getBitcast(
	MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
	DAG.getTargetConstant(EltBits, DL, MVT::i8),
	DAG.getTargetConstant(HiIdx, DL, MVT::i8)));
	return DAG.getBitcast(VT,
	DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
	}

	// If this would require more than 2 unpack instructions to expand, use
	// pshufb when available. We can only use more than 2 unpack instructions
	// when zero extending i8 elements which also makes it easier to use pshufb.
	if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) {
	assert(NumElements == 16 && "Unexpected byte vector width!");
	SDValue PSHUFBMask[16];
	for (int i = 0; i < 16; ++i) {
	int Idx = Offset + (i / Scale);
	if ((i % Scale == 0 && SafeOffset(Idx))) {
	PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8);
	continue;
	}
	PSHUFBMask[i] =
	AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8);
	}
	InputV = DAG.getBitcast(MVT::v16i8, InputV);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
	DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask)));
	}

	// If we are extending from an offset, ensure we start on a boundary that
	// we can unpack from.
	int AlignToUnpack = Offset % (NumElements / Scale);
	if (AlignToUnpack) {
	SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
	for (int i = AlignToUnpack; i < NumElements; ++i)
	ShMask[i - AlignToUnpack] = i;
	InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
	Offset -= AlignToUnpack;
	}

	// Otherwise emit a sequence of unpacks.
	do {
	unsigned UnpackLoHi = X86ISD::UNPCKL;
	if (Offset >= (NumElements / 2)) {
	UnpackLoHi = X86ISD::UNPCKH;
	Offset -= (NumElements / 2);
	}

	MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
	SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
	: getZeroVector(InputVT, Subtarget, DAG, DL);
	InputV = DAG.getBitcast(InputVT, InputV);
	InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext);
	Scale /= 2;
	EltBits *= 2;
	NumElements /= 2;
	} while (Scale > 1);
	return DAG.getBitcast(VT, InputV);
	}

	/// Try to lower a vector shuffle as a zero extension on any microarch.
	///
	/// This routine will try to do everything in its power to cleverly lower
	/// a shuffle which happens to match the pattern of a zero extend. It doesn't
	/// check for the profitability of this lowering, it tries to aggressively
	/// match this pattern. It will use all of the micro-architectural details it
	/// can to emit an efficient lowering. It handles both blends with all-zero
	/// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
	/// masking out later).
	///
	/// The reason we have dedicated lowering for zext-style shuffles is that they
	/// are both incredibly common and often quite performance sensitive.
	static SDValue lowerShuffleAsZeroOrAnyExtend(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Bits = VT.getSizeInBits();
	int NumLanes = Bits / 128;
	int NumElements = VT.getVectorNumElements();
	int NumEltsPerLane = NumElements / NumLanes;
	assert(VT.getScalarSizeInBits() <= 32 &&
	"Exceeds 32-bit integer zero extension limit");
	assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");

	// Define a helper function to check a particular ext-scale and lower to it if
	// valid.
	auto Lower = [&](int Scale) -> SDValue {
	SDValue InputV;
	bool AnyExt = true;
	int Offset = 0;
	int Matches = 0;
	for (int i = 0; i < NumElements; ++i) {
	int M = Mask[i];
	if (M < 0)
	continue; // Valid anywhere but doesn't tell us anything.
	if (i % Scale != 0) {
	// Each of the extended elements need to be zeroable.
	if (!Zeroable[i])
	return SDValue();

	// We no longer are in the anyext case.
	AnyExt = false;
	continue;
	}

	// Each of the base elements needs to be consecutive indices into the
	// same input vector.
	SDValue V = M < NumElements ? V1 : V2;
	M = M % NumElements;
	if (!InputV) {
	InputV = V;
	Offset = M - (i / Scale);
	} else if (InputV != V)
	return SDValue(); // Flip-flopping inputs.

	// Offset must start in the lowest 128-bit lane or at the start of an
	// upper lane.
	// FIXME: Is it ever worth allowing a negative base offset?
	if (!((0 <= Offset && Offset < NumEltsPerLane) \|\|
	(Offset % NumEltsPerLane) == 0))
	return SDValue();

	// If we are offsetting, all referenced entries must come from the same
	// lane.
	if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane))
	return SDValue();

	if ((M % NumElements) != (Offset + (i / Scale)))
	return SDValue(); // Non-consecutive strided elements.
	Matches++;
	}

	// If we fail to find an input, we have a zero-shuffle which should always
	// have already been handled.
	// FIXME: Maybe handle this here in case during blending we end up with one?
	if (!InputV)
	return SDValue();

	// If we are offsetting, don't extend if we only match a single input, we
	// can always do better by using a basic PSHUF or PUNPCK.
	if (Offset != 0 && Matches < 2)
	return SDValue();

	return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt,
	InputV, Mask, Subtarget, DAG);
	};

	// The widest scale possible for extending is to a 64-bit integer.
	assert(Bits % 64 == 0 &&
	"The number of bits in a vector must be divisible by 64 on x86!");
	int NumExtElements = Bits / 64;

	// Each iteration, try extending the elements half as much, but into twice as
	// many elements.
	for (; NumExtElements < NumElements; NumExtElements *= 2) {
	assert(NumElements % NumExtElements == 0 &&
	"The input vector size must be divisible by the extended size.");
	if (SDValue V = Lower(NumElements / NumExtElements))
	return V;
	}

	// General extends failed, but 128-bit vectors may be able to use MOVQ.
	if (Bits != 128)
	return SDValue();

	// Returns one of the source operands if the shuffle can be reduced to a
	// MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.
	auto CanZExtLowHalf = [&]() {
	for (int i = NumElements / 2; i != NumElements; ++i)
	if (!Zeroable[i])
	return SDValue();
	if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
	return V1;
	if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
	return V2;
	return SDValue();
	};

	if (SDValue V = CanZExtLowHalf()) {
	V = DAG.getBitcast(MVT::v2i64, V);
	V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
	return DAG.getBitcast(VT, V);
	}

	// No viable ext lowering found.
	return SDValue();
	}

	/// Try to get a scalar value for a specific element of a vector.
	///
	/// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar.
	static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
	SelectionDAG &DAG) {
	MVT VT = V.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	V = peekThroughBitcasts(V);

	// If the bitcasts shift the element size, we can't extract an equivalent
	// element from it.
	MVT NewVT = V.getSimpleValueType();
	if (!NewVT.isVector() \|\| NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
	return SDValue();

	if (V.getOpcode() == ISD::BUILD_VECTOR \|\|
	(Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) {
	// Ensure the scalar operand is the same size as the destination.
	// FIXME: Add support for scalar truncation where possible.
	SDValue S = V.getOperand(Idx);
	if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits())
	return DAG.getBitcast(EltVT, S);
	}

	return SDValue();
	}

	/// Helper to test for a load that can be folded with x86 shuffles.
	///
	/// This is particularly important because the set of instructions varies
	/// significantly based on whether the operand is a load or not.
	static bool isShuffleFoldableLoad(SDValue V) {
	return V->hasOneUse() &&
	ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode());
	}

	template<typename T>
	static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) {
	return VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16();
	}

	template<typename T>
	bool X86TargetLowering::isSoftFP16(T VT) const {
	return ::isSoftFP16(VT, Subtarget);
	}

	/// Try to lower insertion of a single element into a zero vector.
	///
	/// This is a common pattern that we have especially efficient patterns to lower
	/// across all subtarget feature sets.
	static SDValue lowerShuffleAsElementInsertion(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT ExtVT = VT;
	MVT EltVT = VT.getVectorElementType();

	if (isSoftFP16(EltVT, Subtarget))
	return SDValue();

	int V2Index =
	find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
	Mask.begin();
	bool IsV1Zeroable = true;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (i != V2Index && !Zeroable[i]) {
	IsV1Zeroable = false;
	break;
	}

	// Check for a single input from a SCALAR_TO_VECTOR node.
	// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
	// all the smarts here sunk into that routine. However, the current
	// lowering of BUILD_VECTOR makes that nearly impossible until the old
	// vector shuffle lowering is dead.
	SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
	DAG);
	if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
	// We need to zext the scalar if it is smaller than an i32.
	V2S = DAG.getBitcast(EltVT, V2S);
	if (EltVT == MVT::i8 \|\| (EltVT == MVT::i16 && !Subtarget.hasFP16())) {
	// Using zext to expand a narrow element won't work for non-zero
	// insertions.
	if (!IsV1Zeroable)
	return SDValue();

	// Zero-extend directly to i32.
	ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32);
	V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
	}
	V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
	} else if (Mask[V2Index] != (int)Mask.size() \|\| EltVT == MVT::i8 \|\|
	EltVT == MVT::i16) {
	// Either not inserting from the low element of the input or the input
	// element size is too small to use VZEXT_MOVL to clear the high bits.
	return SDValue();
	}

	if (!IsV1Zeroable) {
	// If V1 can't be treated as a zero vector we have fewer options to lower
	// this. We can't support integer vectors or non-zero targets cheaply, and
	// the V1 elements can't be permuted in any way.
	assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
	if (!VT.isFloatingPoint() \|\| V2Index != 0)
	return SDValue();
	SmallVector<int, 8> V1Mask(Mask);
	V1Mask[V2Index] = -1;
	if (!isNoopShuffleMask(V1Mask))
	return SDValue();
	if (!VT.is128BitVector())
	return SDValue();

	// Otherwise, use MOVSD, MOVSS or MOVSH.
	unsigned MovOpc = 0;
	if (EltVT == MVT::f16)
	MovOpc = X86ISD::MOVSH;
	else if (EltVT == MVT::f32)
	MovOpc = X86ISD::MOVSS;
	else if (EltVT == MVT::f64)
	MovOpc = X86ISD::MOVSD;
	else
	llvm_unreachable("Unsupported floating point element type to handle!");
	return DAG.getNode(MovOpc, DL, ExtVT, V1, V2);
	}

	// This lowering only works for the low element with floating point vectors.
	if (VT.isFloatingPoint() && V2Index != 0)
	return SDValue();

	V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
	if (ExtVT != VT)
	V2 = DAG.getBitcast(VT, V2);

	if (V2Index != 0) {
	// If we have 4 or fewer lanes we can cheaply shuffle the element into
	// the desired position. Otherwise it is more efficient to do a vector
	// shift left. We know that we can do a vector shift left because all
	// the inputs are zero.
	if (VT.isFloatingPoint() \|\| VT.getVectorNumElements() <= 4) {
	SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
	V2Shuffle[V2Index] = 0;
	V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
	} else {
	V2 = DAG.getBitcast(MVT::v16i8, V2);
	V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
	DAG.getTargetConstant(
	V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
	V2 = DAG.getBitcast(VT, V2);
	}
	}
	return V2;
	}

	/// Try to lower broadcast of a single - truncated - integer element,
	/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
	///
	/// This assumes we have AVX2.
	static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0,
	int BroadcastIdx,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX2() &&
	"We can only lower integer broadcasts with AVX2!");

	MVT EltVT = VT.getVectorElementType();
	MVT V0VT = V0.getSimpleValueType();

	assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
	assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");

	MVT V0EltVT = V0VT.getVectorElementType();
	if (!V0EltVT.isInteger())
	return SDValue();

	const unsigned EltSize = EltVT.getSizeInBits();
	const unsigned V0EltSize = V0EltVT.getSizeInBits();

	// This is only a truncation if the original element type is larger.
	if (V0EltSize <= EltSize)
	return SDValue();

	assert(((V0EltSize % EltSize) == 0) &&
	"Scalar type sizes must all be powers of 2 on x86!");

	const unsigned V0Opc = V0.getOpcode();
	const unsigned Scale = V0EltSize / EltSize;
	const unsigned V0BroadcastIdx = BroadcastIdx / Scale;

	if ((V0Opc != ISD::SCALAR_TO_VECTOR \|\| V0BroadcastIdx != 0) &&
	V0Opc != ISD::BUILD_VECTOR)
	return SDValue();

	SDValue Scalar = V0.getOperand(V0BroadcastIdx);

	// If we're extracting non-least-significant bits, shift so we can truncate.
	// Hopefully, we can fold away the trunc/srl/load into the broadcast.
	// Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
	// vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
	if (const int OffsetIdx = BroadcastIdx % Scale)
	Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
	DAG.getConstant(OffsetIdx * EltSize, DL, MVT::i8));

	return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
	DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
	}

	/// Test whether this can be lowered with a single SHUFPS instruction.
	///
	/// This is used to disable more specialized lowerings when the shufps lowering
	/// will happen to be efficient.
	static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
	// This routine only handles 128-bit shufps.
	assert(Mask.size() == 4 && "Unsupported mask size!");
	assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
	assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
	assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
	assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");

	// To lower with a single SHUFPS we need to have the low half and high half
	// each requiring a single input.
	if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
	return false;
	if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
	return false;

	return true;
	}

	/// Test whether the specified input (0 or 1) is in-place blended by the
	/// given mask.
	///
	/// This returns true if the elements from a particular input are already in the
	/// slot required by the given mask and require no permutation.
	static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
	assert((Input == 0 \|\| Input == 1) && "Only two inputs to shuffles.");
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i)
	return false;

	return true;
	}

	/// If we are extracting two 128-bit halves of a vector and shuffling the
	/// result, match that to a 256-bit AVX2 vperm* instruction to avoid a
	/// multi-shuffle lowering.
	static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
	SDValue N1, ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	MVT VT = N0.getSimpleValueType();
	assert((VT.is128BitVector() &&
	(VT.getScalarSizeInBits() == 32 \|\| VT.getScalarSizeInBits() == 64)) &&
	"VPERM* family of shuffles requires 32-bit or 64-bit elements");

	// Check that both sources are extracts of the same source vector.
	if (!N0.hasOneUse() \|\| !N1.hasOneUse() \|\|
	N0.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	N1.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	N0.getOperand(0) != N1.getOperand(0))
	return SDValue();

	SDValue WideVec = N0.getOperand(0);
	MVT WideVT = WideVec.getSimpleValueType();
	if (!WideVT.is256BitVector())
	return SDValue();

	// Match extracts of each half of the wide source vector. Commute the shuffle
	// if the extract of the low half is N1.
	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<int, 4> NewMask(Mask);
	const APInt &ExtIndex0 = N0.getConstantOperandAPInt(1);
	const APInt &ExtIndex1 = N1.getConstantOperandAPInt(1);
	if (ExtIndex1 == 0 && ExtIndex0 == NumElts)
	ShuffleVectorSDNode::commuteMask(NewMask);
	else if (ExtIndex0 != 0 \|\| ExtIndex1 != NumElts)
	return SDValue();

	// Final bailout: if the mask is simple, we are better off using an extract
	// and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps
	// because that avoids a constant load from memory.
	if (NumElts == 4 &&
	(isSingleSHUFPSMask(NewMask) \|\| is128BitUnpackShuffleMask(NewMask, DAG)))
	return SDValue();

	// Extend the shuffle mask with undef elements.
	NewMask.append(NumElts, -1);

	// shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0
	SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
	NewMask);
	// This is free: ymm -> xmm.
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,
	DAG.getIntPtrConstant(0, DL));
	}

	/// Try to lower broadcast of a single element.
	///
	/// For convenience, this code also bundles all of the subtarget feature set
	/// filtering. While a little annoying to re-dispatch on type here, there isn't
	/// a convenient way to factor it out.
	static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) \|\|
	(Subtarget.hasAVX() && VT.isFloatingPoint()) \|\|
	(Subtarget.hasAVX2() && VT.isInteger())))
	return SDValue();

	// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
	// we can only broadcast from a register with AVX2.
	unsigned NumEltBits = VT.getScalarSizeInBits();
	unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2())
	? X86ISD::MOVDDUP
	: X86ISD::VBROADCAST;
	bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) \|\| Subtarget.hasAVX2();

	// Check that the mask is a broadcast.
	int BroadcastIdx = getSplatIndex(Mask);
	if (BroadcastIdx < 0)
	return SDValue();
	assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
	"a sorted mask where the broadcast "
	"comes from V1.");

	// Go up the chain of (vector) values to find a scalar load that we can
	// combine with the broadcast.
	// TODO: Combine this logic with findEltLoadSrc() used by
	// EltsFromConsecutiveLoads().
	int BitOffset = BroadcastIdx * NumEltBits;
	SDValue V = V1;
	for (;;) {
	switch (V.getOpcode()) {
	case ISD::BITCAST: {
	V = V.getOperand(0);
	continue;
	}
	case ISD::CONCAT_VECTORS: {
	int OpBitWidth = V.getOperand(0).getValueSizeInBits();
	int OpIdx = BitOffset / OpBitWidth;
	V = V.getOperand(OpIdx);
	BitOffset %= OpBitWidth;
	continue;
	}
	case ISD::EXTRACT_SUBVECTOR: {
	// The extraction index adds to the existing offset.
	unsigned EltBitWidth = V.getScalarValueSizeInBits();
	unsigned Idx = V.getConstantOperandVal(1);
	unsigned BeginOffset = Idx * EltBitWidth;
	BitOffset += BeginOffset;
	V = V.getOperand(0);
	continue;
	}
	case ISD::INSERT_SUBVECTOR: {
	SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
	int EltBitWidth = VOuter.getScalarValueSizeInBits();
	int Idx = (int)V.getConstantOperandVal(2);
	int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements();
	int BeginOffset = Idx * EltBitWidth;
	int EndOffset = BeginOffset + NumSubElts * EltBitWidth;
	if (BeginOffset <= BitOffset && BitOffset < EndOffset) {
	BitOffset -= BeginOffset;
	V = VInner;
	} else {
	V = VOuter;
	}
	continue;
	}
	}
	break;
	}
	assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset");
	BroadcastIdx = BitOffset / NumEltBits;

	// Do we need to bitcast the source to retrieve the original broadcast index?
	bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits;

	// Check if this is a broadcast of a scalar. We special case lowering
	// for scalars so that we can more effectively fold with loads.
	// If the original value has a larger element type than the shuffle, the
	// broadcast element is in essence truncated. Make that explicit to ease
	// folding.
	if (BitCastSrc && VT.isInteger())
	if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast(
	DL, VT, V, BroadcastIdx, Subtarget, DAG))
	return TruncBroadcast;

	// Also check the simpler case, where we can directly reuse the scalar.
	if (!BitCastSrc &&
	((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) \|\|
	(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0))) {
	V = V.getOperand(BroadcastIdx);

	// If we can't broadcast from a register, check that the input is a load.
	if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
	return SDValue();
	} else if (ISD::isNormalLoad(V.getNode()) &&
	cast<LoadSDNode>(V)->isSimple()) {
	// We do not check for one-use of the vector load because a broadcast load
	// is expected to be a win for code size, register pressure, and possibly
	// uops even if the original vector load is not eliminated.

	// Reduce the vector load and shuffle to a broadcasted scalar load.
	LoadSDNode *Ld = cast<LoadSDNode>(V);
	SDValue BaseAddr = Ld->getOperand(1);
	MVT SVT = VT.getScalarType();
	unsigned Offset = BroadcastIdx * SVT.getStoreSize();
	assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
	SDValue NewAddr =
	DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL);

	// Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather
	// than MOVDDUP.
	// FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX?
	if (Opcode == X86ISD::VBROADCAST) {
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {Ld->getChain(), NewAddr};
	V = DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT,
	DAG.getMachineFunction().getMachineMemOperand(
	Ld->getMemOperand(), Offset, SVT.getStoreSize()));
	DAG.makeEquivalentMemoryOrdering(Ld, V);
	return DAG.getBitcast(VT, V);
	}
	assert(SVT == MVT::f64 && "Unexpected VT!");
	V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
	DAG.getMachineFunction().getMachineMemOperand(
	Ld->getMemOperand(), Offset, SVT.getStoreSize()));
	DAG.makeEquivalentMemoryOrdering(Ld, V);
	} else if (!BroadcastFromReg) {
	// We can't broadcast from a vector register.
	return SDValue();
	} else if (BitOffset != 0) {
	// We can only broadcast from the zero-element of a vector register,
	// but it can be advantageous to broadcast from the zero-element of a
	// subvector.
	if (!VT.is256BitVector() && !VT.is512BitVector())
	return SDValue();

	// VPERMQ/VPERMPD can perform the cross-lane shuffle directly.
	if (VT == MVT::v4f64 \|\| VT == MVT::v4i64)
	return SDValue();

	// Only broadcast the zero-element of a 128-bit subvector.
	if ((BitOffset % 128) != 0)
	return SDValue();

	assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
	"Unexpected bit-offset");
	assert((V.getValueSizeInBits() == 256 \|\| V.getValueSizeInBits() == 512) &&
	"Unexpected vector size");
	unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits();
	V = extract128BitVector(V, ExtractIdx, DAG, DL);
	}

	// On AVX we can use VBROADCAST directly for scalar sources.
	if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) {
	V = DAG.getBitcast(MVT::f64, V);
	if (Subtarget.hasAVX()) {
	V = DAG.getNode(X86ISD::VBROADCAST, DL, MVT::v2f64, V);
	return DAG.getBitcast(VT, V);
	}
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V);
	}

	// If this is a scalar, do the broadcast on this type and bitcast.
	if (!V.getValueType().isVector()) {
	assert(V.getScalarValueSizeInBits() == NumEltBits &&
	"Unexpected scalar size");
	MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(),
	VT.getVectorNumElements());
	return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
	}

	// We only support broadcasting from 128-bit vectors to minimize the
	// number of patterns we need to deal with in isel. So extract down to
	// 128-bits, removing as many bitcasts as possible.
	if (V.getValueSizeInBits() > 128)
	V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);

	// Otherwise cast V to a vector with the same element type as VT, but
	// possibly narrower than VT. Then perform the broadcast.
	unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
	MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts);
	return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V));
	}

	// Check for whether we can use INSERTPS to perform the shuffle. We only use
	// INSERTPS when the V1 elements are already in the correct locations
	// because otherwise we can just always use two SHUFPS instructions which
	// are much smaller to encode than a SHUFPS and an INSERTPS. We can also
	// perform INSERTPS if a single V1 element is out of place and all V2
	// elements are zeroable.
	static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
	unsigned &InsertPSMask,
	const APInt &Zeroable,
	ArrayRef<int> Mask, SelectionDAG &DAG) {
	assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
	assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	// Attempt to match INSERTPS with one element from VA or VB being
	// inserted into VA (or undef). If successful, V1, V2 and InsertPSMask
	// are updated.
	auto matchAsInsertPS = [&](SDValue VA, SDValue VB,
	ArrayRef<int> CandidateMask) {
	unsigned ZMask = 0;
	int VADstIndex = -1;
	int VBDstIndex = -1;
	bool VAUsedInPlace = false;

	for (int i = 0; i < 4; ++i) {
	// Synthesize a zero mask from the zeroable elements (includes undefs).
	if (Zeroable[i]) {
	ZMask \|= 1 << i;
	continue;
	}

	// Flag if we use any VA inputs in place.
	if (i == CandidateMask[i]) {
	VAUsedInPlace = true;
	continue;
	}

	// We can only insert a single non-zeroable element.
	if (VADstIndex >= 0 \|\| VBDstIndex >= 0)
	return false;

	if (CandidateMask[i] < 4) {
	// VA input out of place for insertion.
	VADstIndex = i;
	} else {
	// VB input for insertion.
	VBDstIndex = i;
	}
	}

	// Don't bother if we have no (non-zeroable) element for insertion.
	if (VADstIndex < 0 && VBDstIndex < 0)
	return false;

	// Determine element insertion src/dst indices. The src index is from the
	// start of the inserted vector, not the start of the concatenated vector.
	unsigned VBSrcIndex = 0;
	if (VADstIndex >= 0) {
	// If we have a VA input out of place, we use VA as the V2 element
	// insertion and don't use the original V2 at all.
	VBSrcIndex = CandidateMask[VADstIndex];
	VBDstIndex = VADstIndex;
	VB = VA;
	} else {
	VBSrcIndex = CandidateMask[VBDstIndex] - 4;
	}

	// If no V1 inputs are used in place, then the result is created only from
	// the zero mask and the V2 insertion - so remove V1 dependency.
	if (!VAUsedInPlace)
	VA = DAG.getUNDEF(MVT::v4f32);

	// Update V1, V2 and InsertPSMask accordingly.
	V1 = VA;
	V2 = VB;

	// Insert the V2 element into the desired position.
	InsertPSMask = VBSrcIndex << 6 \| VBDstIndex << 4 \| ZMask;
	assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
	return true;
	};

	if (matchAsInsertPS(V1, V2, Mask))
	return true;

	// Commute and try again.
	SmallVector<int, 4> CommutedMask(Mask);
	ShuffleVectorSDNode::commuteMask(CommutedMask);
	if (matchAsInsertPS(V2, V1, CommutedMask))
	return true;

	return false;
	}

	static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
	ArrayRef<int> Mask, const APInt &Zeroable,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");

	// Attempt to match the insertps pattern.
	unsigned InsertPSMask = 0;
	if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
	return SDValue();

	// Insert the V2 element into the desired position.
	return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
	DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
	}

	/// Handle lowering of 2-lane 64-bit floating point shuffles.
	///
	/// This is the basis function for the 2-lane 64-bit shuffles as we have full
	/// support for floating point shuffles but not integer shuffles. These
	/// instructions will incur a domain crossing penalty on some chips though so
	/// it is better to avoid lowering through this for integer vectors where
	/// possible.
	static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
	assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");

	if (V2.isUndef()) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// Straight shuffle of a single input vector. Simulate this by using the
	// single input as both of the "inputs" to this instruction..
	unsigned SHUFPDMask = (Mask[0] == 1) \| ((Mask[1] == 1) << 1);

	if (Subtarget.hasAVX()) {
	// If we have AVX, we can use VPERMILPS which will allow folding a load
	// into the shuffle.
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
	DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
	}

	return DAG.getNode(
	X86ISD::SHUFP, DL, MVT::v2f64,
	Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
	Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
	DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
	}
	assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!");
	assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!");
	assert(Mask[0] < 2 && "We sort V1 to be the first input.");
	assert(Mask[1] >= 2 && "We sort V2 to be the second input.");

	if (Subtarget.hasAVX2())
	if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
	return Extract;

	// When loading a scalar and then shuffling it into a vector we can often do
	// the insertion cheaply.
	if (SDValue Insertion = lowerShuffleAsElementInsertion(
	DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;
	// Try inverting the insertion since for v2 masks it is easy to do and we
	// can't reliably sort the mask one way or the other.
	int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
	Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
	if (SDValue Insertion = lowerShuffleAsElementInsertion(
	DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
	return Insertion;

	// Try to use one of the special instruction patterns to handle two common
	// blend patterns if a zero-blend above didn't work.
	if (isShuffleEquivalent(Mask, {0, 3}, V1, V2) \|\|
	isShuffleEquivalent(Mask, {1, 3}, V1, V2))
	if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
	// We can either use a special instruction to load over the low double or
	// to move just the low double.
	return DAG.getNode(
	X86ISD::MOVSD, DL, MVT::v2f64, V2,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));

	if (Subtarget.hasSSE41())
	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
	return V;

	unsigned SHUFPDMask = (Mask[0] == 1) \| (((Mask[1] - 2) == 1) << 1);
	return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
	DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
	}

	/// Handle lowering of 2-lane 64-bit integer shuffles.
	///
	/// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
	/// the integer unit to minimize domain crossing penalties. However, for blends
	/// it falls back to the floating point shuffle operation with appropriate bit
	/// casting.
	static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
	assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");

	if (V2.isUndef()) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// Straight shuffle of a single input vector. For everything from SSE2
	// onward this has a single fast instruction with no scary immediates.
	// We have to map the mask as it is actually a v4i32 shuffle instruction.
	V1 = DAG.getBitcast(MVT::v4i32, V1);
	int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2),
	Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1),
	Mask[1] < 0 ? -1 : (Mask[1] * 2),
	Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)};
	return DAG.getBitcast(
	MVT::v2i64,
	DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
	getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG)));
	}
	assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!");
	assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!");
	assert(Mask[0] < 2 && "We sort V1 to be the first input.");
	assert(Mask[1] >= 2 && "We sort V2 to be the second input.");

	if (Subtarget.hasAVX2())
	if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
	return Extract;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// When loading a scalar and then shuffling it into a vector we can often do
	// the insertion cheaply.
	if (SDValue Insertion = lowerShuffleAsElementInsertion(
	DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;
	// Try inverting the insertion since for v2 masks it is easy to do and we
	// can't reliably sort the mask one way or the other.
	int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
	if (SDValue Insertion = lowerShuffleAsElementInsertion(
	DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
	return Insertion;

	// We have different paths for blend lowering, but they all must use the
	// exact same predicate.
	bool IsBlendSupported = Subtarget.hasSSE41();
	if (IsBlendSupported)
	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
	return V;

	// Try to use byte rotation instructions.
	// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
	if (Subtarget.hasSSSE3()) {
	if (Subtarget.hasVLX())
	if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;
	}

	// If we have direct support for blends, we should lower by decomposing into
	// a permute. That will be faster than the domain cross.
	if (IsBlendSupported)
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v2i64, V1, V2, Mask,
	Subtarget, DAG);

	// We implement this with SHUFPD which is pretty lame because it will likely
	// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
	// However, all the alternatives are still more cycles and newer chips don't
	// have this problem. It would be really nice if x86 had better shuffles here.
	V1 = DAG.getBitcast(MVT::v2f64, V1);
	V2 = DAG.getBitcast(MVT::v2f64, V2);
	return DAG.getBitcast(MVT::v2i64,
	DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
	}

	/// Lower a vector shuffle using the SHUFPS instruction.
	///
	/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
	/// It makes no assumptions about whether this is the best lowering, it simply
	/// uses it.
	static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	SDValue LowV = V1, HighV = V2;
	SmallVector<int, 4> NewMask(Mask);
	int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });

	if (NumV2Elements == 1) {
	int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin();

	// Compute the index adjacent to V2Index and in the same half by toggling
	// the low bit.
	int V2AdjIndex = V2Index ^ 1;

	if (Mask[V2AdjIndex] < 0) {
	// Handles all the cases where we have a single V2 element and an undef.
	// This will only ever happen in the high lanes because we commute the
	// vector otherwise.
	if (V2Index < 2)
	std::swap(LowV, HighV);
	NewMask[V2Index] -= 4;
	} else {
	// Handle the case where the V2 element ends up adjacent to a V1 element.
	// To make this work, blend them together as the first step.
	int V1Index = V2AdjIndex;
	int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
	V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
	getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));

	// Now proceed to reconstruct the final blend as we have the necessary
	// high or low half formed.
	if (V2Index < 2) {
	LowV = V2;
	HighV = V1;
	} else {
	HighV = V2;
	}
	NewMask[V1Index] = 2; // We put the V1 element in V2[2].
	NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
	}
	} else if (NumV2Elements == 2) {
	if (Mask[0] < 4 && Mask[1] < 4) {
	// Handle the easy case where we have V1 in the low lanes and V2 in the
	// high lanes.
	NewMask[2] -= 4;
	NewMask[3] -= 4;
	} else if (Mask[2] < 4 && Mask[3] < 4) {
	// We also handle the reversed case because this utility may get called
	// when we detect a SHUFPS pattern but can't easily commute the shuffle to
	// arrange things in the right direction.
	NewMask[0] -= 4;
	NewMask[1] -= 4;
	HighV = V1;
	LowV = V2;
	} else {
	// We have a mixture of V1 and V2 in both low and high lanes. Rather than
	// trying to place elements directly, just blend them and set up the final
	// shuffle to place them.

	// The first two blend mask elements are for V1, the second two are for
	// V2.
	int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
	Mask[2] < 4 ? Mask[2] : Mask[3],
	(Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
	(Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
	V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
	getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));

	// Now we do a normal shuffle of V1 by giving V1 as both operands to
	// a blend.
	LowV = HighV = V1;
	NewMask[0] = Mask[0] < 4 ? 0 : 2;
	NewMask[1] = Mask[0] < 4 ? 2 : 0;
	NewMask[2] = Mask[2] < 4 ? 1 : 3;
	NewMask[3] = Mask[2] < 4 ? 3 : 1;
	}
	} else if (NumV2Elements == 3) {
	// Ideally canonicalizeShuffleMaskWithCommute should have caught this, but
	// we can get here due to other paths (e.g repeated mask matching) that we
	// don't want to do another round of lowerVECTOR_SHUFFLE.
	ShuffleVectorSDNode::commuteMask(NewMask);
	return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG);
	}
	return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
	getV4X86ShuffleImm8ForMask(NewMask, DL, DAG));
	}

	/// Lower 4-lane 32-bit floating point shuffles.
	///
	/// Uses instructions exclusively from the floating point unit to minimize
	/// domain crossing penalties, as these are sufficient to implement all v4f32
	/// shuffles.
	static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });

	if (NumV2Elements == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// Use even/odd duplicate instructions for masks that match their pattern.
	if (Subtarget.hasSSE3()) {
	if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2))
	return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
	if (isShuffleEquivalent(Mask, {1, 1, 3, 3}, V1, V2))
	return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
	}

	if (Subtarget.hasAVX()) {
	// If we have AVX, we can use VPERMILPS which will allow folding a load
	// into the shuffle.
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	// Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid
	// in SSE1 because otherwise they are widened to v2f64 and never get here.
	if (!Subtarget.hasSSE2()) {
	if (isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2))
	return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1);
	if (isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1, V2))
	return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1);
	}

	// Otherwise, use a straight shuffle of a single input vector. We pass the
	// input vector to both operands to simulate this with a SHUFPS.
	return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	if (Subtarget.hasAVX2())
	if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
	return Extract;

	// There are special ways we can lower some single-element blends. However, we
	// have custom ways we can lower more complex single-element blends below that
	// we defer to if both this and BLENDPS fail to match, so restrict this to
	// when the V2 input is targeting element 0 of the mask -- that is the fast
	// case here.
	if (NumV2Elements == 1 && Mask[0] >= 4)
	if (SDValue V = lowerShuffleAsElementInsertion(
	DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	if (Subtarget.hasSSE41()) {
	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use INSERTPS if we can complete the shuffle efficiently.
	if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
	return V;

	if (!isSingleSHUFPSMask(Mask))
	if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
	V2, Mask, DAG))
	return BlendPerm;
	}

	// Use low/high mov instructions. These are only valid in SSE1 because
	// otherwise they are widened to v2f64 and never get here.
	if (!Subtarget.hasSSE2()) {
	if (isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2))
	return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
	if (isShuffleEquivalent(Mask, {2, 3, 6, 7}, V1, V2))
	return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
	}

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
	return V;

	// Otherwise fall back to a SHUFPS lowering strategy.
	return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
	}

	/// Lower 4-lane i32 vector shuffles.
	///
	/// We try to handle these with integer-domain shuffles where we can, but for
	/// blends we use the floating point domain blend instructions.
	static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return ZExt;

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });

	if (NumV2Elements == 0) {
	// Try to use broadcast unless the mask only has one non-undef element.
	if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) {
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;
	}

	// Straight shuffle of a single input vector. For everything from SSE2
	// onward this has a single fast instruction with no scary immediates.
	// We coerce the shuffle pattern to be compatible with UNPCK instructions
	// but we aren't actually going to use the UNPCK instruction because doing
	// so prevents folding a load into this instruction or making a copy.
	const int UnpackLoMask[] = {0, 0, 1, 1};
	const int UnpackHiMask[] = {2, 2, 3, 3};
	if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
	Mask = UnpackLoMask;
	else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
	Mask = UnpackHiMask;

	return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	if (Subtarget.hasAVX2())
	if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
	return Extract;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// There are special ways we can lower some single-element blends.
	if (NumV2Elements == 1)
	if (SDValue V = lowerShuffleAsElementInsertion(
	DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	// We have different paths for blend lowering, but they all must use the
	// exact same predicate.
	bool IsBlendSupported = Subtarget.hasSSE41();
	if (IsBlendSupported)
	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Masked;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
	return V;

	// Try to use byte rotation instructions.
	// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
	if (Subtarget.hasSSSE3()) {
	if (Subtarget.hasVLX())
	if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;
	}

	// Assume that a single SHUFPS is faster than an alternative sequence of
	// multiple instructions (even if the CPU has a domain penalty).
	// If some CPU is harmed by the domain switch, we can fix it in a later pass.
	if (!isSingleSHUFPSMask(Mask)) {
	// If we have direct support for blends, we should lower by decomposing into
	// a permute. That will be faster than the domain cross.
	if (IsBlendSupported)
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i32, V1, V2, Mask,
	Subtarget, DAG);

	// Try to lower by permuting the inputs into an unpack instruction.
	if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2,
	Mask, Subtarget, DAG))
	return Unpack;
	}

	// We implement this with SHUFPS because it can blend from two vectors.
	// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
	// up the inputs, bypassing domain shift penalties that we would incur if we
	// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
	// relevant.
	SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
	SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2);
	SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask);
	return DAG.getBitcast(MVT::v4i32, ShufPS);
	}

	/// Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
	/// shuffle lowering, and the most complex part.
	///
	/// The lowering strategy is to try to form pairs of input lanes which are
	/// targeted at the same half of the final vector, and then use a dword shuffle
	/// to place them onto the right half, and finally unpack the paired lanes into
	/// their final position.
	///
	/// The exact breakdown of how to form these dword pairs and align them on the
	/// correct sides is really tricky. See the comments within the function for
	/// more of the details.
	///
	/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
	/// lane must shuffle the exact same way. In fact, you must pass a v8 Mask to
	/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
	/// vector, form the analogous 128-bit 8-element Mask.
	static SDValue lowerV8I16GeneralSingleInputShuffle(
	const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
	MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);

	assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
	MutableArrayRef<int> LoMask = Mask.slice(0, 4);
	MutableArrayRef<int> HiMask = Mask.slice(4, 4);

	// Attempt to directly match PSHUFLW or PSHUFHW.
	if (isUndefOrInRange(LoMask, 0, 4) &&
	isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
	return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));
	}
	if (isUndefOrInRange(HiMask, 4, 8) &&
	isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
	for (int i = 0; i != 4; ++i)
	HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));
	return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));
	}

	SmallVector<int, 4> LoInputs;
	copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
	array_pod_sort(LoInputs.begin(), LoInputs.end());
	LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
	SmallVector<int, 4> HiInputs;
	copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
	array_pod_sort(HiInputs.begin(), HiInputs.end());
	HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
	int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin();
	int NumHToL = LoInputs.size() - NumLToL;
	int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin();
	int NumHToH = HiInputs.size() - NumLToH;
	MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
	MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
	MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
	MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);

	// If we are shuffling values from one half - check how many different DWORD
	// pairs we need to create. If only 1 or 2 then we can perform this as a
	// PSHUFLW/PSHUFHW + PSHUFD instead of the PSHUFD+PSHUFLW+PSHUFHW chain below.
	auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask,
	ArrayRef<int> PSHUFDMask, unsigned ShufWOp) {
	V = DAG.getNode(ShufWOp, DL, VT, V,
	getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
	V = DAG.getBitcast(PSHUFDVT, V);
	V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V,
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
	return DAG.getBitcast(VT, V);
	};

	if ((NumHToL + NumHToH) == 0 \|\| (NumLToL + NumLToH) == 0) {
	int PSHUFDMask[4] = { -1, -1, -1, -1 };
	SmallVector<std::pair<int, int>, 4> DWordPairs;
	int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);

	// Collect the different DWORD pairs.
	for (int DWord = 0; DWord != 4; ++DWord) {
	int M0 = Mask[2 * DWord + 0];
	int M1 = Mask[2 * DWord + 1];
	M0 = (M0 >= 0 ? M0 % 4 : M0);
	M1 = (M1 >= 0 ? M1 % 4 : M1);
	if (M0 < 0 && M1 < 0)
	continue;

	bool Match = false;
	for (int j = 0, e = DWordPairs.size(); j < e; ++j) {
	auto &DWordPair = DWordPairs[j];
	if ((M0 < 0 \|\| isUndefOrEqual(DWordPair.first, M0)) &&
	(M1 < 0 \|\| isUndefOrEqual(DWordPair.second, M1))) {
	DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first);
	DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second);
	PSHUFDMask[DWord] = DOffset + j;
	Match = true;
	break;
	}
	}
	if (!Match) {
	PSHUFDMask[DWord] = DOffset + DWordPairs.size();
	DWordPairs.push_back(std::make_pair(M0, M1));
	}
	}

	if (DWordPairs.size() <= 2) {
	DWordPairs.resize(2, std::make_pair(-1, -1));
	int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
	DWordPairs[1].first, DWordPairs[1].second};
	if ((NumHToL + NumHToH) == 0)
	return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW);
	if ((NumLToL + NumLToH) == 0)
	return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW);
	}
	}

	// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
	// such inputs we can swap two of the dwords across the half mark and end up
	// with <=2 inputs to each half in each half. Once there, we can fall through
	// to the generic code below. For example:
	//
	// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
	// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
	//
	// However in some very rare cases we have a 1-into-3 or 3-into-1 on one half
	// and an existing 2-into-2 on the other half. In this case we may have to
	// pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or
	// 1-into-3 which could cause us to cycle endlessly fixing each side in turn.
	// Fortunately, we don't have to handle anything but a 2-into-2 pattern
	// because any other situation (including a 3-into-1 or 1-into-3 in the other
	// half than the one we target for fixing) will be fixed when we re-enter this
	// path. We will also combine away any sequence of PSHUFD instructions that
	// result into a single instruction. Here is an example of the tricky case:
	//
	// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
	// Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3]
	//
	// This now has a 1-into-3 in the high half! Instead, we do two shuffles:
	//
	// Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h]
	// Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6]
	//
	// Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h]
	// Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6]
	//
	// The result is fine to be handled by the generic logic.
	auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs,
	ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs,
	int AOffset, int BOffset) {
	assert((AToAInputs.size() == 3 \|\| AToAInputs.size() == 1) &&
	"Must call this with A having 3 or 1 inputs from the A half.");
	assert((BToAInputs.size() == 1 \|\| BToAInputs.size() == 3) &&
	"Must call this with B having 1 or 3 inputs from the B half.");
	assert(AToAInputs.size() + BToAInputs.size() == 4 &&
	"Must call this with either 3:1 or 1:3 inputs (summing to 4).");

	bool ThreeAInputs = AToAInputs.size() == 3;

	// Compute the index of dword with only one word among the three inputs in
	// a half by taking the sum of the half with three inputs and subtracting
	// the sum of the actual three inputs. The difference is the remaining
	// slot.
	int ADWord = 0, BDWord = 0;
	int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
	int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
	int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
	ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
	int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
	int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
	int TripleNonInputIdx =
	TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
	TripleDWord = TripleNonInputIdx / 2;

	// We use xor with one to compute the adjacent DWord to whichever one the
	// OneInput is in.
	OneInputDWord = (OneInput / 2) ^ 1;

	// Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA
	// and BToA inputs. If there is also such a problem with the BToB and AToB
	// inputs, we don't try to fix it necessarily -- we'll recurse and see it in
	// the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it
	// is essential that we don't create a 3<-1 as then we might oscillate.
	if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
	// Compute how many inputs will be flipped by swapping these DWords. We
	// need
	// to balance this to ensure we don't form a 3-1 shuffle in the other
	// half.
	int NumFlippedAToBInputs = llvm::count(AToBInputs, 2 * ADWord) +
	llvm::count(AToBInputs, 2 * ADWord + 1);
	int NumFlippedBToBInputs = llvm::count(BToBInputs, 2 * BDWord) +
	llvm::count(BToBInputs, 2 * BDWord + 1);
	if ((NumFlippedAToBInputs == 1 &&
	(NumFlippedBToBInputs == 0 \|\| NumFlippedBToBInputs == 2)) \|\|
	(NumFlippedBToBInputs == 1 &&
	(NumFlippedAToBInputs == 0 \|\| NumFlippedAToBInputs == 2))) {
	// We choose whether to fix the A half or B half based on whether that
	// half has zero flipped inputs. At zero, we may not be able to fix it
	// with that half. We also bias towards fixing the B half because that
	// will more commonly be the high half, and we have to bias one way.
	auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
	ArrayRef<int> Inputs) {
	int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot.
	bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1);
	// Determine whether the free index is in the flipped dword or the
	// unflipped dword based on where the pinned index is. We use this bit
	// in an xor to conditionally select the adjacent dword.
	int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
	bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
	if (IsFixIdxInput == IsFixFreeIdxInput)
	FixFreeIdx += 1;
	IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
	assert(IsFixIdxInput != IsFixFreeIdxInput &&
	"We need to be changing the number of flipped inputs!");
	int PSHUFHalfMask[] = {0, 1, 2, 3};
	std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
	V = DAG.getNode(
	FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
	MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V,
	getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));

	for (int &M : Mask)
	if (M >= 0 && M == FixIdx)
	M = FixFreeIdx;
	else if (M >= 0 && M == FixFreeIdx)
	M = FixIdx;
	};
	if (NumFlippedBToBInputs != 0) {
	int BPinnedIdx =
	BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
	FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
	} else {
	assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
	int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
	FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
	}
	}
	}

	int PSHUFDMask[] = {0, 1, 2, 3};
	PSHUFDMask[ADWord] = BDWord;
	PSHUFDMask[BDWord] = ADWord;
	V = DAG.getBitcast(
	VT,
	DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));

	// Adjust the mask to match the new locations of A and B.
	for (int &M : Mask)
	if (M >= 0 && M/2 == ADWord)
	M = 2 * BDWord + M % 2;
	else if (M >= 0 && M/2 == BDWord)
	M = 2 * ADWord + M % 2;

	// Recurse back into this routine to re-compute state now that this isn't
	// a 3 and 1 problem.
	return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG);
	};
	if ((NumLToL == 3 && NumHToL == 1) \|\| (NumLToL == 1 && NumHToL == 3))
	return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
	if ((NumHToH == 3 && NumLToH == 1) \|\| (NumHToH == 1 && NumLToH == 3))
	return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);

	// At this point there are at most two inputs to the low and high halves from
	// each half. That means the inputs can always be grouped into dwords and
	// those dwords can then be moved to the correct half with a dword shuffle.
	// We use at most one low and one high word shuffle to collect these paired
	// inputs into dwords, and finally a dword shuffle to place them.
	int PSHUFLMask[4] = {-1, -1, -1, -1};
	int PSHUFHMask[4] = {-1, -1, -1, -1};
	int PSHUFDMask[4] = {-1, -1, -1, -1};

	// First fix the masks for all the inputs that are staying in their
	// original halves. This will then dictate the targets of the cross-half
	// shuffles.
	auto fixInPlaceInputs =
	[&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
	MutableArrayRef<int> SourceHalfMask,
	MutableArrayRef<int> HalfMask, int HalfOffset) {
	if (InPlaceInputs.empty())
	return;
	if (InPlaceInputs.size() == 1) {
	SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
	InPlaceInputs[0] - HalfOffset;
	PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
	return;
	}
	if (IncomingInputs.empty()) {
	// Just fix all of the in place inputs.
	for (int Input : InPlaceInputs) {
	SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
	PSHUFDMask[Input / 2] = Input / 2;
	}
	return;
	}

	assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
	SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
	InPlaceInputs[0] - HalfOffset;
	// Put the second input next to the first so that they are packed into
	// a dword. We find the adjacent index by toggling the low bit.
	int AdjIndex = InPlaceInputs[0] ^ 1;
	SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
	std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
	PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
	};
	fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
	fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);

	// Now gather the cross-half inputs and place them into a free dword of
	// their target half.
	// FIXME: This operation could almost certainly be simplified dramatically to
	// look more like the 3-1 fixing operation.
	auto moveInputsToRightHalf = [&PSHUFDMask](
	MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
	MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
	MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
	int DestOffset) {
	auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
	return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
	};
	auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
	int Word) {
	int LowWord = Word & ~1;
	int HighWord = Word \| 1;
	return isWordClobbered(SourceHalfMask, LowWord) \|\|
	isWordClobbered(SourceHalfMask, HighWord);
	};

	if (IncomingInputs.empty())
	return;

	if (ExistingInputs.empty()) {
	// Map any dwords with inputs from them into the right half.
	for (int Input : IncomingInputs) {
	// If the source half mask maps over the inputs, turn those into
	// swaps and use the swapped lane.
	if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
	if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) {
	SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
	Input - SourceOffset;
	// We have to swap the uses in our half mask in one sweep.
	for (int &M : HalfMask)
	if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
	M = Input;
	else if (M == Input)
	M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
	} else {
	assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
	Input - SourceOffset &&
	"Previous placement doesn't match!");
	}
	// Note that this correctly re-maps both when we do a swap and when
	// we observe the other side of the swap above. We rely on that to
	// avoid swapping the members of the input list directly.
	Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
	}

	// Map the input's dword into the correct half.
	if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0)
	PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
	else
	assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
	Input / 2 &&
	"Previous placement doesn't match!");
	}

	// And just directly shift any other-half mask elements to be same-half
	// as we will have mirrored the dword containing the element into the
	// same position within that half.
	for (int &M : HalfMask)
	if (M >= SourceOffset && M < SourceOffset + 4) {
	M = M - SourceOffset + DestOffset;
	assert(M >= 0 && "This should never wrap below zero!");
	}
	return;
	}

	// Ensure we have the input in a viable dword of its current half. This
	// is particularly tricky because the original position may be clobbered
	// by inputs being moved and staying in that half.
	if (IncomingInputs.size() == 1) {
	if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
	int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +
	SourceOffset;
	SourceHalfMask[InputFixed - SourceOffset] =
	IncomingInputs[0] - SourceOffset;
	std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
	InputFixed);
	IncomingInputs[0] = InputFixed;
	}
	} else if (IncomingInputs.size() == 2) {
	if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 \|\|
	isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
	// We have two non-adjacent or clobbered inputs we need to extract from
	// the source half. To do this, we need to map them into some adjacent
	// dword slot in the source mask.
	int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
	IncomingInputs[1] - SourceOffset};

	// If there is a free slot in the source half mask adjacent to one of
	// the inputs, place the other input in it. We use (Index XOR 1) to
	// compute an adjacent index.
	if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
	SourceHalfMask[InputsFixed[0] ^ 1] < 0) {
	SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
	SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
	InputsFixed[1] = InputsFixed[0] ^ 1;
	} else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
	SourceHalfMask[InputsFixed[1] ^ 1] < 0) {
	SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
	SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
	InputsFixed[0] = InputsFixed[1] ^ 1;
	} else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 &&
	SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) {
	// The two inputs are in the same DWord but it is clobbered and the
	// adjacent DWord isn't used at all. Move both inputs to the free
	// slot.
	SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
	SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
	InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
	InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
	} else {
	// The only way we hit this point is if there is no clobbering
	// (because there are no off-half inputs to this half) and there is no
	// free slot adjacent to one of the inputs. In this case, we have to
	// swap an input with a non-input.
	for (int i = 0; i < 4; ++i)
	assert((SourceHalfMask[i] < 0 \|\| SourceHalfMask[i] == i) &&
	"We can't handle any clobbers here!");
	assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
	"Cannot have adjacent inputs here!");

	SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
	SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;

	// We also have to update the final source mask in this case because
	// it may need to undo the above swap.
	for (int &M : FinalSourceHalfMask)
	if (M == (InputsFixed[0] ^ 1) + SourceOffset)
	M = InputsFixed[1] + SourceOffset;
	else if (M == InputsFixed[1] + SourceOffset)
	M = (InputsFixed[0] ^ 1) + SourceOffset;

	InputsFixed[1] = InputsFixed[0] ^ 1;
	}

	// Point everything at the fixed inputs.
	for (int &M : HalfMask)
	if (M == IncomingInputs[0])
	M = InputsFixed[0] + SourceOffset;
	else if (M == IncomingInputs[1])
	M = InputsFixed[1] + SourceOffset;

	IncomingInputs[0] = InputsFixed[0] + SourceOffset;
	IncomingInputs[1] = InputsFixed[1] + SourceOffset;
	}
	} else {
	llvm_unreachable("Unhandled input size!");
	}

	// Now hoist the DWord down to the right half.
	int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2;
	assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free");
	PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
	for (int &M : HalfMask)
	for (int Input : IncomingInputs)
	if (M == Input)
	M = FreeDWord * 2 + Input % 2;
	};
	moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
	/SourceOffset/ 4, /DestOffset/ 0);
	moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
	/SourceOffset/ 0, /DestOffset/ 4);

	// Now enact all the shuffles we've computed to move the inputs into their
	// target half.
	if (!isNoopShuffleMask(PSHUFLMask))
	V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG));
	if (!isNoopShuffleMask(PSHUFHMask))
	V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG));
	if (!isNoopShuffleMask(PSHUFDMask))
	V = DAG.getBitcast(
	VT,
	DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));

	// At this point, each half should contain all its inputs, and we can then
	// just shuffle them into their final position.
	assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 &&
	"Failed to lift all the high half inputs to the low mask!");
	assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 &&
	"Failed to lift all the low half inputs to the high mask!");

	// Do a half shuffle for the low mask.
	if (!isNoopShuffleMask(LoMask))
	V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));

	// Do a half shuffle with the high mask after shifting its values down.
	for (int &M : HiMask)
	if (M >= 0)
	M -= 4;
	if (!isNoopShuffleMask(HiMask))
	V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));

	return V;
	}

	/// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
	/// blend if only one input is used.
	static SDValue lowerShuffleAsBlendOfPSHUFBs(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
	assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
	"Lane crossing shuffle masks not supported");

	int NumBytes = VT.getSizeInBits() / 8;
	int Size = Mask.size();
	int Scale = NumBytes / Size;

	SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8));
	SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8));
	V1InUse = false;
	V2InUse = false;

	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / Scale];
	if (M < 0)
	continue;

	const int ZeroMask = 0x80;
	int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask;
	int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale;
	if (Zeroable[i / Scale])
	V1Idx = V2Idx = ZeroMask;

	V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
	V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
	V1InUse \|= (ZeroMask != V1Idx);
	V2InUse \|= (ZeroMask != V2Idx);
	}

	MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes);
	if (V1InUse)
	V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1),
	DAG.getBuildVector(ShufVT, DL, V1Mask));
	if (V2InUse)
	V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2),
	DAG.getBuildVector(ShufVT, DL, V2Mask));

	// If we need shuffled inputs from both, blend the two.
	SDValue V;
	if (V1InUse && V2InUse)
	V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2);
	else
	V = V1InUse ? V1 : V2;

	// Cast the result back to the correct type.
	return DAG.getBitcast(VT, V);
	}

	/// Generic lowering of 8-lane i16 shuffles.
	///
	/// This handles both single-input shuffles and combined shuffle/blends with
	/// two inputs. The single input shuffles are immediately delegated to
	/// a dedicated lowering routine.
	///
	/// The blends are lowered in one of three fundamental ways. If there are few
	/// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
	/// of the input is significantly cheaper when lowered as an interleaving of
	/// the two inputs, try to interleave them. Otherwise, blend the low and high
	/// halves of the inputs separately (making them have relatively few inputs)
	/// and then concatenate them.
	static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return ZExt;

	// Try to use lower using a truncation.
	if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });

	if (NumV2Inputs == 0) {
	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// Try to use bit rotation instructions.
	if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask,
	Subtarget, DAG))
	return Rotate;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
	return V;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
	Subtarget))
	return V;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask,
	Subtarget, DAG))
	return Rotate;

	// Make a copy of the mask so it can be modified.
	SmallVector<int, 8> MutableMask(Mask);
	return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask,
	Subtarget, DAG);
	}

	assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) &&
	"All single-input shuffles should be canonicalized to be V1-input "
	"shuffles.");

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// See if we can use SSE4A Extraction / Insertion.
	if (Subtarget.hasSSE4A())
	if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, DAG))
	return V;

	// There are special ways we can lower some single-element blends.
	if (NumV2Inputs == 1)
	if (SDValue V = lowerShuffleAsElementInsertion(
	DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	// We have different paths for blend lowering, but they all must use the
	// exact same predicate.
	bool IsBlendSupported = Subtarget.hasSSE41();
	if (IsBlendSupported)
	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Masked;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
	return V;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
	Subtarget))
	return V;

	// Try to use lower using a truncation.
	if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue BitBlend =
	lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
	return BitBlend;

	// Try to use byte shift instructions to mask.
	if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return V;

	// Attempt to lower using compaction, SSE41 is necessary for PACKUSDW.
	// We could use SIGN_EXTEND_INREG+PACKSSDW for older targets but this seems to
	// be slower than a PSHUFLW+PSHUFHW+PSHUFD chain.
	int NumEvenDrops = canLowerByDroppingElements(Mask, true, false);
	if ((NumEvenDrops == 1 \|\| NumEvenDrops == 2) && Subtarget.hasSSE41() &&
	!Subtarget.hasVLX()) {
	// Check if this is part of a 256-bit vector truncation.
	if (NumEvenDrops == 2 && Subtarget.hasAVX2() &&
	peekThroughBitcasts(V1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	peekThroughBitcasts(V2).getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	SDValue V1V2 = concatSubVectors(V1, V2, DAG, DL);
	V1V2 = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1V2,
	getZeroVector(MVT::v16i16, Subtarget, DAG, DL),
	DAG.getTargetConstant(0xEE, DL, MVT::i8));
	V1V2 = DAG.getBitcast(MVT::v8i32, V1V2);
	V1 = extract128BitVector(V1V2, 0, DAG, DL);
	V2 = extract128BitVector(V1V2, 4, DAG, DL);
	} else {
	SmallVector<SDValue, 4> DWordClearOps(4,
	DAG.getConstant(0, DL, MVT::i32));
	for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
	DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
	SDValue DWordClearMask =
	DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps);
	V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1),
	DWordClearMask);
	V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
	DWordClearMask);
	}
	// Now pack things back together.
	SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2);
	if (NumEvenDrops == 2) {
	Result = DAG.getBitcast(MVT::v4i32, Result);
	Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, Result, Result);
	}
	return Result;
	}

	// When compacting odd (upper) elements, use PACKSS pre-SSE41.
	int NumOddDrops = canLowerByDroppingElements(Mask, false, false);
	if (NumOddDrops == 1) {
	bool HasSSE41 = Subtarget.hasSSE41();
	V1 = DAG.getNode(HasSSE41 ? X86ISD::VSRLI : X86ISD::VSRAI, DL, MVT::v4i32,
	DAG.getBitcast(MVT::v4i32, V1),
	DAG.getTargetConstant(16, DL, MVT::i8));
	V2 = DAG.getNode(HasSSE41 ? X86ISD::VSRLI : X86ISD::VSRAI, DL, MVT::v4i32,
	DAG.getBitcast(MVT::v4i32, V2),
	DAG.getTargetConstant(16, DL, MVT::i8));
	return DAG.getNode(HasSSE41 ? X86ISD::PACKUS : X86ISD::PACKSS, DL,
	MVT::v8i16, V1, V2);
	}

	// Try to lower by permuting the inputs into an unpack instruction.
	if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
	Mask, Subtarget, DAG))
	return Unpack;

	// If we can't directly blend but can use PSHUFB, that will be better as it
	// can both shuffle and set up the inefficient blend.
	if (!IsBlendSupported && Subtarget.hasSSSE3()) {
	bool V1InUse, V2InUse;
	return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, DAG, V1InUse, V2InUse);
	}

	// We can always bit-blend if we have to so the fallback strategy is to
	// decompose into single-input permutes and blends/unpacks.
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i16, V1, V2,
	Mask, Subtarget, DAG);
	}

	/// Lower 8-lane 16-bit floating point shuffles.
	static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
	int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; });

	if (Subtarget.hasFP16()) {
	if (NumV2Elements == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;
	}
	if (NumV2Elements == 1 && Mask[0] >= 8)
	if (SDValue V = lowerShuffleAsElementInsertion(
	DL, MVT::v8f16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;
	}

	V1 = DAG.getBitcast(MVT::v8i16, V1);
	V2 = DAG.getBitcast(MVT::v8i16, V2);
	return DAG.getBitcast(MVT::v8f16,
	DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask));
	}

	// Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets,
	// sub-512-bit shuffles are padded to 512-bits for the shuffle and then
	// the active subvector is extracted.
	static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT MaskVT = VT.changeTypeToInteger();
	SDValue MaskNode;
	MVT ShuffleVT = VT;
	if (!VT.is512BitVector() && !Subtarget.hasVLX()) {
	V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512);
	V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512);
	ShuffleVT = V1.getSimpleValueType();

	// Adjust mask to correct indices for the second input.
	int NumElts = VT.getVectorNumElements();
	unsigned Scale = 512 / VT.getSizeInBits();
	SmallVector<int, 32> AdjustedMask(Mask);
	for (int &M : AdjustedMask)
	if (NumElts <= M)
	M += (Scale - 1) * NumElts;
	MaskNode = getConstVector(AdjustedMask, MaskVT, DAG, DL, true);
	MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512);
	} else {
	MaskNode = getConstVector(Mask, MaskVT, DAG, DL, true);
	}

	SDValue Result;
	if (V2.isUndef())
	Result = DAG.getNode(X86ISD::VPERMV, DL, ShuffleVT, MaskNode, V1);
	else
	Result = DAG.getNode(X86ISD::VPERMV3, DL, ShuffleVT, V1, MaskNode, V2);

	if (VT != ShuffleVT)
	Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits());

	return Result;
	}

	/// Generic lowering of v16i8 shuffles.
	///
	/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
	/// detect any complexity reducing interleaving. If that doesn't help, it uses
	/// UNPCK to spread the i8 elements across two i16-element vectors, and uses
	/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
	/// back together.
	static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
	Subtarget))
	return V;

	// Try to use a zext lowering.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return ZExt;

	// Try to use lower using a truncation.
	if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	// See if we can use SSE4A Extraction / Insertion.
	if (Subtarget.hasSSE4A())
	if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, DAG))
	return V;

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });

	// For single-input shuffles, there are some nicer lowering tricks we can use.
	if (NumV2Elements == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// Try to use bit rotation instructions.
	if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
	return V;

	// Check whether we can widen this to an i16 shuffle by duplicating bytes.
	// Notably, this handles splat and partial-splat shuffles more efficiently.
	// However, it only makes sense if the pre-duplication shuffle simplifies
	// things significantly. Currently, this means we need to be able to
	// express the pre-duplication shuffle as an i16 shuffle.
	//
	// FIXME: We should check for other patterns which can be widened into an
	// i16 shuffle as well.
	auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
	for (int i = 0; i < 16; i += 2)
	if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1])
	return false;

	return true;
	};
	auto tryToWidenViaDuplication = [&]() -> SDValue {
	if (!canWidenViaDuplication(Mask))
	return SDValue();
	SmallVector<int, 4> LoInputs;
	copy_if(Mask, std::back_inserter(LoInputs),
	[](int M) { return M >= 0 && M < 8; });
	array_pod_sort(LoInputs.begin(), LoInputs.end());
	LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
	LoInputs.end());
	SmallVector<int, 4> HiInputs;
	copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; });
	array_pod_sort(HiInputs.begin(), HiInputs.end());
	HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
	HiInputs.end());

	bool TargetLo = LoInputs.size() >= HiInputs.size();
	ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
	ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;

	int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
	SmallDenseMap<int, int, 8> LaneMap;
	for (int I : InPlaceInputs) {
	PreDupI16Shuffle[I/2] = I/2;
	LaneMap[I] = I;
	}
	int j = TargetLo ? 0 : 4, je = j + 4;
	for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
	// Check if j is already a shuffle of this input. This happens when
	// there are two adjacent bytes after we move the low one.
	if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
	// If we haven't yet mapped the input, search for a slot into which
	// we can map it.
	while (j < je && PreDupI16Shuffle[j] >= 0)
	++j;

	if (j == je)
	// We can't place the inputs into a single half with a simple i16 shuffle, so bail.
	return SDValue();

	// Map this input with the i16 shuffle.
	PreDupI16Shuffle[j] = MovingInputs[i] / 2;
	}

	// Update the lane map based on the mapping we ended up with.
	LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
	}
	V1 = DAG.getBitcast(
	MVT::v16i8,
	DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
	DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));

	// Unpack the bytes to form the i16s that will be shuffled into place.
	bool EvenInUse = false, OddInUse = false;
	for (int i = 0; i < 16; i += 2) {
	EvenInUse \|= (Mask[i + 0] >= 0);
	OddInUse \|= (Mask[i + 1] >= 0);
	if (EvenInUse && OddInUse)
	break;
	}
	V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
	MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8),
	OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8));

	int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
	for (int i = 0; i < 16; ++i)
	if (Mask[i] >= 0) {
	int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
	assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
	if (PostDupI16Shuffle[i / 2] < 0)
	PostDupI16Shuffle[i / 2] = MappedMask;
	else
	assert(PostDupI16Shuffle[i / 2] == MappedMask &&
	"Conflicting entries in the original shuffle!");
	}
	return DAG.getBitcast(
	MVT::v16i8,
	DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
	DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
	};
	if (SDValue V = tryToWidenViaDuplication())
	return V;
	}

	if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Masked;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
	return V;

	// Try to use byte shift instructions to mask.
	if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return V;

	// Check for compaction patterns.
	bool IsSingleInput = V2.isUndef();
	int NumEvenDrops = canLowerByDroppingElements(Mask, true, IsSingleInput);

	// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
	// with PSHUFB. It is important to do this before we attempt to generate any
	// blends but after all of the single-input lowerings. If the single input
	// lowerings can find an instruction sequence that is faster than a PSHUFB, we
	// want to preserve that and we can DAG combine any longer sequences into
	// a PSHUFB in the end. But once we start blending from multiple inputs,
	// the complexity of DAG combining bad patterns back into PSHUFB is too high,
	// and there are very few patterns that would actually be faster than the
	// PSHUFB approach because of its ability to zero lanes.
	//
	// If the mask is a binary compaction, we can more efficiently perform this
	// as a PACKUS(AND(),AND()) - which is quicker than UNPACK(PSHUFB(),PSHUFB()).
	//
	// FIXME: The only exceptions to the above are blends which are exact
	// interleavings with direct instructions supporting them. We currently don't
	// handle those well here.
	if (Subtarget.hasSSSE3() && (IsSingleInput \|\| NumEvenDrops != 1)) {
	bool V1InUse = false;
	bool V2InUse = false;

	SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs(
	DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);

	// If both V1 and V2 are in use and we can use a direct blend or an unpack,
	// do so. This avoids using them to handle blends-with-zero which is
	// important as a single pshufb is significantly faster for that.
	if (V1InUse && V2InUse) {
	if (Subtarget.hasSSE41())
	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// We can use an unpack to do the blending rather than an or in some
	// cases. Even though the or may be (very minorly) more efficient, we
	// preference this lowering because there are common cases where part of
	// the complexity of the shuffles goes away when we do the final blend as
	// an unpack.
	// FIXME: It might be worth trying to detect if the unpack-feeding
	// shuffles will both be pshufb, in which case we shouldn't bother with
	// this.
	if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(
	DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
	return Unpack;

	// AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8).
	if (Subtarget.hasVBMI())
	return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, Subtarget,
	DAG);

	// If we have XOP we can use one VPPERM instead of multiple PSHUFBs.
	if (Subtarget.hasXOP()) {
	SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true);
	return DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, V1, V2, MaskNode);
	}

	// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
	// PALIGNR will be cheaper than the second PSHUFB+OR.
	if (SDValue V = lowerShuffleAsByteRotateAndPermute(
	DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
	return V;
	}

	return PSHUFB;
	}

	// There are special ways we can lower some single-element blends.
	if (NumV2Elements == 1)
	if (SDValue V = lowerShuffleAsElementInsertion(
	DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
	return Blend;

	// Check whether a compaction lowering can be done. This handles shuffles
	// which take every Nth element for some even N. See the helper function for
	// details.
	//
	// We special case these as they can be particularly efficiently handled with
	// the PACKUSB instruction on x86 and they show up in common patterns of
	// rearranging bytes to truncate wide elements.
	if (NumEvenDrops) {
	// NumEvenDrops is the power of two stride of the elements. Another way of
	// thinking about it is that we need to drop the even elements this many
	// times to get the original input.

	// First we need to zero all the dropped bytes.
	assert(NumEvenDrops <= 3 &&
	"No support for dropping even elements more than 3 times.");
	SmallVector<SDValue, 8> WordClearOps(8, DAG.getConstant(0, DL, MVT::i16));
	for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1))
	WordClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i16);
	SDValue WordClearMask = DAG.getBuildVector(MVT::v8i16, DL, WordClearOps);
	V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V1),
	WordClearMask);
	if (!IsSingleInput)
	V2 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V2),
	WordClearMask);

	// Now pack things back together.
	SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1,
	IsSingleInput ? V1 : V2);
	for (int i = 1; i < NumEvenDrops; ++i) {
	Result = DAG.getBitcast(MVT::v8i16, Result);
	Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
	}
	return Result;
	}

	int NumOddDrops = canLowerByDroppingElements(Mask, false, IsSingleInput);
	if (NumOddDrops == 1) {
	V1 = DAG.getNode(X86ISD::VSRLI, DL, MVT::v8i16,
	DAG.getBitcast(MVT::v8i16, V1),
	DAG.getTargetConstant(8, DL, MVT::i8));
	if (!IsSingleInput)
	V2 = DAG.getNode(X86ISD::VSRLI, DL, MVT::v8i16,
	DAG.getBitcast(MVT::v8i16, V2),
	DAG.getTargetConstant(8, DL, MVT::i8));
	return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1,
	IsSingleInput ? V1 : V2);
	}

	// Handle multi-input cases by blending/unpacking single-input shuffles.
	if (NumV2Elements > 0)
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask,
	Subtarget, DAG);

	// The fallback path for single-input shuffles widens this into two v8i16
	// vectors with unpacks, shuffles those, and then pulls them back together
	// with a pack.
	SDValue V = V1;

	std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
	std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
	for (int i = 0; i < 16; ++i)
	if (Mask[i] >= 0)
	(i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];

	SDValue VLoHalf, VHiHalf;
	// Check if any of the odd lanes in the v16i8 are used. If not, we can mask
	// them out and avoid using UNPCK{L,H} to extract the elements of V as
	// i16s.
	if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) &&
	none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) {
	// Use a mask to drop the high bytes.
	VLoHalf = DAG.getBitcast(MVT::v8i16, V);
	VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
	DAG.getConstant(0x00FF, DL, MVT::v8i16));

	// This will be a single vector shuffle instead of a blend so nuke VHiHalf.
	VHiHalf = DAG.getUNDEF(MVT::v8i16);

	// Squash the masks to point directly into VLoHalf.
	for (int &M : LoBlendMask)
	if (M >= 0)
	M /= 2;
	for (int &M : HiBlendMask)
	if (M >= 0)
	M /= 2;
	} else {
	// Otherwise just unpack the low half of V into VLoHalf and the high half into
	// VHiHalf so that we can blend them as i16s.
	SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);

	VLoHalf = DAG.getBitcast(
	MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
	VHiHalf = DAG.getBitcast(
	MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
	}

	SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
	SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);

	return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
	}

	/// Dispatching routine to lower various 128-bit x86 vector shuffles.
	///
	/// This routine breaks down the specific type of 128-bit shuffle and
	/// dispatches to the lowering routines accordingly.
	static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	switch (VT.SimpleTy) {
	case MVT::v2i64:
	return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v2f64:
	return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v4i32:
	return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v4f32:
	return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8i16:
	return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8f16:
	return lowerV8F16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16i8:
	return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);

	default:
	llvm_unreachable("Unimplemented!");
	}
	}

	/// Generic routine to split vector shuffle into half-sized shuffles.
	///
	/// This routine just extracts two subvectors, shuffles them independently, and
	/// then concatenates them back together. This should work effectively with all
	/// AVX vector shuffle types.
	static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(VT.getSizeInBits() >= 256 &&
	"Only for 256-bit or wider vector shuffles!");
	assert(V1.getSimpleValueType() == VT && "Bad operand type!");
	assert(V2.getSimpleValueType() == VT && "Bad operand type!");

	ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
	ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);

	int NumElements = VT.getVectorNumElements();
	int SplitNumElements = NumElements / 2;
	MVT ScalarVT = VT.getVectorElementType();
	MVT SplitVT = MVT::getVectorVT(ScalarVT, SplitNumElements);

	// Use splitVector/extractSubVector so that split build-vectors just build two
	// narrower build vectors. This helps shuffling with splats and zeros.
	auto SplitVector = [&](SDValue V) {
	SDValue LoV, HiV;
	std::tie(LoV, HiV) = splitVector(peekThroughBitcasts(V), DAG, DL);
	return std::make_pair(DAG.getBitcast(SplitVT, LoV),
	DAG.getBitcast(SplitVT, HiV));
	};

	SDValue LoV1, HiV1, LoV2, HiV2;
	std::tie(LoV1, HiV1) = SplitVector(V1);
	std::tie(LoV2, HiV2) = SplitVector(V2);

	// Now create two 4-way blends of these half-width vectors.
	auto HalfBlend = [&](ArrayRef<int> HalfMask) {
	bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
	SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1);
	SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1);
	SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1);
	for (int i = 0; i < SplitNumElements; ++i) {
	int M = HalfMask[i];
	if (M >= NumElements) {
	if (M >= NumElements + SplitNumElements)
	UseHiV2 = true;
	else
	UseLoV2 = true;
	V2BlendMask[i] = M - NumElements;
	BlendMask[i] = SplitNumElements + i;
	} else if (M >= 0) {
	if (M >= SplitNumElements)
	UseHiV1 = true;
	else
	UseLoV1 = true;
	V1BlendMask[i] = M;
	BlendMask[i] = i;
	}
	}

	// Because the lowering happens after all combining takes place, we need to
	// manually combine these blend masks as much as possible so that we create
	// a minimal number of high-level vector shuffle nodes.

	// First try just blending the halves of V1 or V2.
	if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
	return DAG.getUNDEF(SplitVT);
	if (!UseLoV2 && !UseHiV2)
	return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
	if (!UseLoV1 && !UseHiV1)
	return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);

	SDValue V1Blend, V2Blend;
	if (UseLoV1 && UseHiV1) {
	V1Blend =
	DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
	} else {
	// We only use half of V1 so map the usage down into the final blend mask.
	V1Blend = UseLoV1 ? LoV1 : HiV1;
	for (int i = 0; i < SplitNumElements; ++i)
	if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
	BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
	}
	if (UseLoV2 && UseHiV2) {
	V2Blend =
	DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
	} else {
	// We only use half of V2 so map the usage down into the final blend mask.
	V2Blend = UseLoV2 ? LoV2 : HiV2;
	for (int i = 0; i < SplitNumElements; ++i)
	if (BlendMask[i] >= SplitNumElements)
	BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
	}
	return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
	};
	SDValue Lo = HalfBlend(LoMask);
	SDValue Hi = HalfBlend(HiMask);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
	}

	/// Either split a vector in halves or decompose the shuffles and the
	/// blend/unpack.
	///
	/// This is provided as a good fallback for many lowerings of non-single-input
	/// shuffles with more than one 128-bit lane. In those cases, we want to select
	/// between splitting the shuffle into 128-bit components and stitching those
	/// back together vs. extracting the single-input shuffles and blending those
	/// results.
	static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(!V2.isUndef() && "This routine must not be used to lower single-input "
	"shuffles as it could then recurse on itself.");
	int Size = Mask.size();

	// If this can be modeled as a broadcast of two elements followed by a blend,
	// prefer that lowering. This is especially important because broadcasts can
	// often fold with memory operands.
	auto DoBothBroadcast = [&] {
	int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
	for (int M : Mask)
	if (M >= Size) {
	if (V2BroadcastIdx < 0)
	V2BroadcastIdx = M - Size;
	else if (M - Size != V2BroadcastIdx)
	return false;
	} else if (M >= 0) {
	if (V1BroadcastIdx < 0)
	V1BroadcastIdx = M;
	else if (M != V1BroadcastIdx)
	return false;
	}
	return true;
	};
	if (DoBothBroadcast())
	return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget,
	DAG);

	// If the inputs all stem from a single 128-bit lane of each input, then we
	// split them rather than blending because the split will decompose to
	// unusually few instructions.
	int LaneCount = VT.getSizeInBits() / 128;
	int LaneSize = Size / LaneCount;
	SmallBitVector LaneInputs[2];
	LaneInputs[0].resize(LaneCount, false);
	LaneInputs[1].resize(LaneCount, false);
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0)
	LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
	if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
	return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);

	// Otherwise, just fall back to decomposed shuffles and a blend/unpack. This
	// requires that the decomposed single-input shuffles don't end up here.
	return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget,
	DAG);
	}

	// Lower as SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
	// TODO: Extend to support v8f32 (+ 512-bit shuffles).
	static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(VT == MVT::v4f64 && "Only for v4f64 shuffles");

	int LHSMask[4] = {-1, -1, -1, -1};
	int RHSMask[4] = {-1, -1, -1, -1};
	unsigned SHUFPMask = 0;

	// As SHUFPD uses a single LHS/RHS element per lane, we can always
	// perform the shuffle once the lanes have been shuffled in place.
	for (int i = 0; i != 4; ++i) {
	int M = Mask[i];
	if (M < 0)
	continue;
	int LaneBase = i & ~1;
	auto &LaneMask = (i & 1) ? RHSMask : LHSMask;
	LaneMask[LaneBase + (M & 1)] = M;
	SHUFPMask \|= (M & 1) << i;
	}

	SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask);
	SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask);
	return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS,
	DAG.getTargetConstant(SHUFPMask, DL, MVT::i8));
	}

	/// Lower a vector shuffle crossing multiple 128-bit lanes as
	/// a lane permutation followed by a per-lane permutation.
	///
	/// This is mainly for cases where we can have non-repeating permutes
	/// in each lane.
	///
	/// TODO: This is very similar to lowerShuffleAsLanePermuteAndRepeatedMask,
	/// we should investigate merging them.
	static SDValue lowerShuffleAsLanePermuteAndPermute(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	SelectionDAG &DAG, const X86Subtarget &Subtarget) {
	int NumElts = VT.getVectorNumElements();
	int NumLanes = VT.getSizeInBits() / 128;
	int NumEltsPerLane = NumElts / NumLanes;
	bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef();

	/// Attempts to find a sublane permute with the given size
	/// that gets all elements into their target lanes.
	///
	/// If successful, fills CrossLaneMask and InLaneMask and returns true.
	/// If unsuccessful, returns false and may overwrite InLaneMask.
	auto getSublanePermute = [&](int NumSublanes) -> SDValue {
	int NumSublanesPerLane = NumSublanes / NumLanes;
	int NumEltsPerSublane = NumElts / NumSublanes;

	SmallVector<int, 16> CrossLaneMask;
	SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef);
	// CrossLaneMask but one entry == one sublane.
	SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef);

	for (int i = 0; i != NumElts; ++i) {
	int M = Mask[i];
	if (M < 0)
	continue;

	int SrcSublane = M / NumEltsPerSublane;
	int DstLane = i / NumEltsPerLane;

	// We only need to get the elements into the right lane, not sublane.
	// So search all sublanes that make up the destination lane.
	bool Found = false;
	int DstSubStart = DstLane * NumSublanesPerLane;
	int DstSubEnd = DstSubStart + NumSublanesPerLane;
	for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) {
	if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane))
	continue;

	Found = true;
	CrossLaneMaskLarge[DstSublane] = SrcSublane;
	int DstSublaneOffset = DstSublane * NumEltsPerSublane;
	InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane;
	break;
	}
	if (!Found)
	return SDValue();
	}

	// Fill CrossLaneMask using CrossLaneMaskLarge.
	narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask);

	if (!CanUseSublanes) {
	// If we're only shuffling a single lowest lane and the rest are identity
	// then don't bother.
	// TODO - isShuffleMaskInputInPlace could be extended to something like
	// this.
	int NumIdentityLanes = 0;
	bool OnlyShuffleLowestLane = true;
	for (int i = 0; i != NumLanes; ++i) {
	int LaneOffset = i * NumEltsPerLane;
	if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane,
	i * NumEltsPerLane))
	NumIdentityLanes++;
	else if (CrossLaneMask[LaneOffset] != 0)
	OnlyShuffleLowestLane = false;
	}
	if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
	return SDValue();
	}

	// Avoid returning the same shuffle operation. For example,
	// t7: v16i16 = vector_shuffle<8,9,10,11,4,5,6,7,0,1,2,3,12,13,14,15> t5,
	// undef:v16i16
	if (CrossLaneMask == Mask \|\| InLaneMask == Mask)
	return SDValue();

	SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);
	return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT),
	InLaneMask);
	};

	// First attempt a solution with full lanes.
	if (SDValue V = getSublanePermute(/NumSublanes=/NumLanes))
	return V;

	// The rest of the solutions use sublanes.
	if (!CanUseSublanes)
	return SDValue();

	// Then attempt a solution with 64-bit sublanes (vpermq).
	if (SDValue V = getSublanePermute(/NumSublanes=/NumLanes * 2))
	return V;

	// If that doesn't work and we have fast variable cross-lane shuffle,
	// attempt 32-bit sublanes (vpermd).
	if (!Subtarget.hasFastVariableCrossLaneShuffle())
	return SDValue();

	return getSublanePermute(/NumSublanes=/NumLanes * 4);
	}

	/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
	/// source with a lane permutation.
	///
	/// This lowering strategy results in four instructions in the worst case for a
	/// single-input cross lane shuffle which is lower than any other fully general
	/// cross-lane shuffle strategy I'm aware of. Special cases for each particular
	/// shuffle pattern should be handled prior to trying this lowering.
	static SDValue lowerShuffleAsLanePermuteAndShuffle(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	SelectionDAG &DAG, const X86Subtarget &Subtarget) {
	// FIXME: This should probably be generalized for 512-bit vectors as well.
	assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
	int Size = Mask.size();
	int LaneSize = Size / 2;

	// Fold to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
	// Only do this if the elements aren't all from the lower lane,
	// otherwise we're (probably) better off doing a split.
	if (VT == MVT::v4f64 &&
	!all_of(Mask, [LaneSize](int M) { return M < LaneSize; }))
	return lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG);

	// If there are only inputs from one 128-bit lane, splitting will in fact be
	// less expensive. The flags track whether the given lane contains an element
	// that crosses to another lane.
	bool AllLanes;
	if (!Subtarget.hasAVX2()) {
	bool LaneCrossing[2] = {false, false};
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
	LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
	AllLanes = LaneCrossing[0] && LaneCrossing[1];
	} else {
	bool LaneUsed[2] = {false, false};
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0)
	LaneUsed[(Mask[i] % Size) / LaneSize] = true;
	AllLanes = LaneUsed[0] && LaneUsed[1];
	}

	// TODO - we could support shuffling V2 in the Flipped input.
	assert(V2.isUndef() &&
	"This last part of this routine only works on single input shuffles");

	SmallVector<int, 32> InLaneMask(Mask);
	for (int i = 0; i < Size; ++i) {
	int &M = InLaneMask[i];
	if (M < 0)
	continue;
	if (((M % Size) / LaneSize) != (i / LaneSize))
	M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
	}
	assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&
	"In-lane shuffle mask expected");

	// If we're not using both lanes in each lane and the inlane mask is not
	// repeating, then we're better off splitting.
	if (!AllLanes && !is128BitLaneRepeatedShuffleMask(VT, InLaneMask))
	return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);

	// Flip the lanes, and shuffle the results which should now be in-lane.
	MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
	SDValue Flipped = DAG.getBitcast(PVT, V1);
	Flipped =
	DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1});
	Flipped = DAG.getBitcast(VT, Flipped);
	return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
	}

	/// Handle lowering 2-lane 128-bit shuffles.
	static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (V2.isUndef()) {
	// Attempt to match VBROADCAST*128 subvector broadcast load.
	bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1);
	bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
	if ((SplatLo \|\| SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
	X86::mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
	MVT MemVT = VT.getHalfNumVectorElementsVT();
	unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
	auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
	if (SDValue BcstLd = getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL,
	VT, MemVT, Ld, Ofs, DAG))
	return BcstLd;
	}

	// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
	if (Subtarget.hasAVX2())
	return SDValue();
	}

	bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode());

	SmallVector<int, 4> WidenedMask;
	if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask))
	return SDValue();

	bool IsLowZero = (Zeroable & 0x3) == 0x3;
	bool IsHighZero = (Zeroable & 0xc) == 0xc;

	// Try to use an insert into a zero vector.
	if (WidenedMask[0] == 0 && IsHighZero) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
	SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
	getZeroVector(VT, Subtarget, DAG, DL), LoV,
	DAG.getIntPtrConstant(0, DL));
	}

	// TODO: If minimizing size and one of the inputs is a zero vector and the
	// the zero vector has only one use, we could use a VPERM2X128 to save the
	// instruction bytes needed to explicitly generate the zero vector.

	// Blends are faster and handle all the non-lane-crossing cases.
	if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return Blend;

	// If either input operand is a zero vector, use VPERM2X128 because its mask
	// allows us to replace the zero input with an implicit zero.
	if (!IsLowZero && !IsHighZero) {
	// Check for patterns which can be matched with a single insert of a 128-bit
	// subvector.
	bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2);
	if (OnlyUsesV1 \|\| isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) {

	// With AVX1, use vperm2f128 (below) to allow load folding. Otherwise,
	// this will likely become vinsertf128 which can't fold a 256-bit memop.
	if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
	SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
	OnlyUsesV1 ? V1 : V2,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,
	DAG.getIntPtrConstant(2, DL));
	}
	}

	// Try to use SHUF128 if possible.
	if (Subtarget.hasVLX()) {
	if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
	unsigned PermMask = ((WidenedMask[0] % 2) << 0) \|
	((WidenedMask[1] % 2) << 1);
	return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
	DAG.getTargetConstant(PermMask, DL, MVT::i8));
	}
	}
	}

	// Otherwise form a 128-bit permutation. After accounting for undefs,
	// convert the 64-bit shuffle mask selection values into 128-bit
	// selection bits by dividing the indexes by 2 and shifting into positions
	// defined by a vperm2*128 instruction's immediate control byte.

	// The immediate permute control byte looks like this:
	// [1:0] - select 128 bits from sources for low half of destination
	// [2] - ignore
	// [3] - zero low half of destination
	// [5:4] - select 128 bits from sources for high half of destination
	// [6] - ignore
	// [7] - zero high half of destination

	assert((WidenedMask[0] >= 0 \|\| IsLowZero) &&
	(WidenedMask[1] >= 0 \|\| IsHighZero) && "Undef half?");

	unsigned PermMask = 0;
	PermMask \|= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
	PermMask \|= IsHighZero ? 0x80 : (WidenedMask[1] << 4);

	// Check the immediate mask and replace unused sources with undef.
	if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00)
	V1 = DAG.getUNDEF(VT);
	if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20)
	V2 = DAG.getUNDEF(VT);

	return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
	DAG.getTargetConstant(PermMask, DL, MVT::i8));
	}

	/// Lower a vector shuffle by first fixing the 128-bit lanes and then
	/// shuffling each lane.
	///
	/// This attempts to create a repeated lane shuffle where each lane uses one
	/// or two of the lanes of the inputs. The lanes of the input vectors are
	/// shuffled in one or two independent shuffles to get the lanes into the
	/// position needed by the final shuffle.
	static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	assert(!V2.isUndef() && "This is only useful with multiple inputs.");

	if (is128BitLaneRepeatedShuffleMask(VT, Mask))
	return SDValue();

	int NumElts = Mask.size();
	int NumLanes = VT.getSizeInBits() / 128;
	int NumLaneElts = 128 / VT.getScalarSizeInBits();
	SmallVector<int, 16> RepeatMask(NumLaneElts, -1);
	SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}});

	// First pass will try to fill in the RepeatMask from lanes that need two
	// sources.
	for (int Lane = 0; Lane != NumLanes; ++Lane) {
	int Srcs[2] = {-1, -1};
	SmallVector<int, 16> InLaneMask(NumLaneElts, -1);
	for (int i = 0; i != NumLaneElts; ++i) {
	int M = Mask[(Lane * NumLaneElts) + i];
	if (M < 0)
	continue;
	// Determine which of the possible input lanes (NumLanes from each source)
	// this element comes from. Assign that as one of the sources for this
	// lane. We can assign up to 2 sources for this lane. If we run out
	// sources we can't do anything.
	int LaneSrc = M / NumLaneElts;
	int Src;
	if (Srcs[0] < 0 \|\| Srcs[0] == LaneSrc)
	Src = 0;
	else if (Srcs[1] < 0 \|\| Srcs[1] == LaneSrc)
	Src = 1;
	else
	return SDValue();

	Srcs[Src] = LaneSrc;
	InLaneMask[i] = (M % NumLaneElts) + Src * NumElts;
	}

	// If this lane has two sources, see if it fits with the repeat mask so far.
	if (Srcs[1] < 0)
	continue;

	LaneSrcs[Lane][0] = Srcs[0];
	LaneSrcs[Lane][1] = Srcs[1];

	auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) {
	assert(M1.size() == M2.size() && "Unexpected mask size");
	for (int i = 0, e = M1.size(); i != e; ++i)
	if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i])
	return false;
	return true;
	};

	auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) {
	assert(Mask.size() == MergedMask.size() && "Unexpected mask size");
	for (int i = 0, e = MergedMask.size(); i != e; ++i) {
	int M = Mask[i];
	if (M < 0)
	continue;
	assert((MergedMask[i] < 0 \|\| MergedMask[i] == M) &&
	"Unexpected mask element");
	MergedMask[i] = M;
	}
	};

	if (MatchMasks(InLaneMask, RepeatMask)) {
	// Merge this lane mask into the final repeat mask.
	MergeMasks(InLaneMask, RepeatMask);
	continue;
	}

	// Didn't find a match. Swap the operands and try again.
	std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]);
	ShuffleVectorSDNode::commuteMask(InLaneMask);

	if (MatchMasks(InLaneMask, RepeatMask)) {
	// Merge this lane mask into the final repeat mask.
	MergeMasks(InLaneMask, RepeatMask);
	continue;
	}

	// Couldn't find a match with the operands in either order.
	return SDValue();
	}

	// Now handle any lanes with only one source.
	for (int Lane = 0; Lane != NumLanes; ++Lane) {
	// If this lane has already been processed, skip it.
	if (LaneSrcs[Lane][0] >= 0)
	continue;

	for (int i = 0; i != NumLaneElts; ++i) {
	int M = Mask[(Lane * NumLaneElts) + i];
	if (M < 0)
	continue;

	// If RepeatMask isn't defined yet we can define it ourself.
	if (RepeatMask[i] < 0)
	RepeatMask[i] = M % NumLaneElts;

	if (RepeatMask[i] < NumElts) {
	if (RepeatMask[i] != M % NumLaneElts)
	return SDValue();
	LaneSrcs[Lane][0] = M / NumLaneElts;
	} else {
	if (RepeatMask[i] != ((M % NumLaneElts) + NumElts))
	return SDValue();
	LaneSrcs[Lane][1] = M / NumLaneElts;
	}
	}

	if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0)
	return SDValue();
	}

	SmallVector<int, 16> NewMask(NumElts, -1);
	for (int Lane = 0; Lane != NumLanes; ++Lane) {
	int Src = LaneSrcs[Lane][0];
	for (int i = 0; i != NumLaneElts; ++i) {
	int M = -1;
	if (Src >= 0)
	M = Src * NumLaneElts + i;
	NewMask[Lane * NumLaneElts + i] = M;
	}
	}
	SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
	// Ensure we didn't get back the shuffle we started with.
	// FIXME: This is a hack to make up for some splat handling code in
	// getVectorShuffle.
	if (isa<ShuffleVectorSDNode>(NewV1) &&
	cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)
	return SDValue();

	for (int Lane = 0; Lane != NumLanes; ++Lane) {
	int Src = LaneSrcs[Lane][1];
	for (int i = 0; i != NumLaneElts; ++i) {
	int M = -1;
	if (Src >= 0)
	M = Src * NumLaneElts + i;
	NewMask[Lane * NumLaneElts + i] = M;
	}
	}
	SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
	// Ensure we didn't get back the shuffle we started with.
	// FIXME: This is a hack to make up for some splat handling code in
	// getVectorShuffle.
	if (isa<ShuffleVectorSDNode>(NewV2) &&
	cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
	return SDValue();

	for (int i = 0; i != NumElts; ++i) {
	NewMask[i] = RepeatMask[i % NumLaneElts];
	if (NewMask[i] < 0)
	continue;

	NewMask[i] += (i / NumLaneElts) * NumLaneElts;
	}
	return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
	}

	/// If the input shuffle mask results in a vector that is undefined in all upper
	/// or lower half elements and that mask accesses only 2 halves of the
	/// shuffle's operands, return true. A mask of half the width with mask indexes
	/// adjusted to access the extracted halves of the original shuffle operands is
	/// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or
	/// lower half of each input operand is accessed.
	static bool
	getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
	int &HalfIdx1, int &HalfIdx2) {
	assert((Mask.size() == HalfMask.size() * 2) &&
	"Expected input mask to be twice as long as output");

	// Exactly one half of the result must be undef to allow narrowing.
	bool UndefLower = isUndefLowerHalf(Mask);
	bool UndefUpper = isUndefUpperHalf(Mask);
	if (UndefLower == UndefUpper)
	return false;

	unsigned HalfNumElts = HalfMask.size();
	unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0;
	HalfIdx1 = -1;
	HalfIdx2 = -1;
	for (unsigned i = 0; i != HalfNumElts; ++i) {
	int M = Mask[i + MaskIndexOffset];
	if (M < 0) {
	HalfMask[i] = M;
	continue;
	}

	// Determine which of the 4 half vectors this element is from.
	// i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
	int HalfIdx = M / HalfNumElts;

	// Determine the element index into its half vector source.
	int HalfElt = M % HalfNumElts;

	// We can shuffle with up to 2 half vectors, set the new 'half'
	// shuffle mask accordingly.
	if (HalfIdx1 < 0 \|\| HalfIdx1 == HalfIdx) {
	HalfMask[i] = HalfElt;
	HalfIdx1 = HalfIdx;
	continue;
	}
	if (HalfIdx2 < 0 \|\| HalfIdx2 == HalfIdx) {
	HalfMask[i] = HalfElt + HalfNumElts;
	HalfIdx2 = HalfIdx;
	continue;
	}

	// Too many half vectors referenced.
	return false;
	}

	return true;
	}

	/// Given the output values from getHalfShuffleMask(), create a half width
	/// shuffle of extracted vectors followed by an insert back to full width.
	static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
	ArrayRef<int> HalfMask, int HalfIdx1,
	int HalfIdx2, bool UndefLower,
	SelectionDAG &DAG, bool UseConcat = false) {
	assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
	assert(V1.getValueType().isSimple() && "Expecting only simple types");

	MVT VT = V1.getSimpleValueType();
	MVT HalfVT = VT.getHalfNumVectorElementsVT();
	unsigned HalfNumElts = HalfVT.getVectorNumElements();

	auto getHalfVector = [&](int HalfIdx) {
	if (HalfIdx < 0)
	return DAG.getUNDEF(HalfVT);
	SDValue V = (HalfIdx < 2 ? V1 : V2);
	HalfIdx = (HalfIdx % 2) * HalfNumElts;
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
	DAG.getIntPtrConstant(HalfIdx, DL));
	};

	// ins undef, (shuf (ext V1, HalfIdx1), (ext V2, HalfIdx2), HalfMask), Offset
	SDValue Half1 = getHalfVector(HalfIdx1);
	SDValue Half2 = getHalfVector(HalfIdx2);
	SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
	if (UseConcat) {
	SDValue Op0 = V;
	SDValue Op1 = DAG.getUNDEF(HalfVT);
	if (UndefLower)
	std::swap(Op0, Op1);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1);
	}

	unsigned Offset = UndefLower ? HalfNumElts : 0;
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
	DAG.getIntPtrConstant(Offset, DL));
	}

	/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
	/// This allows for fast cases such as subvector extraction/insertion
	/// or shuffling smaller vector types which can lower more efficiently.
	static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert((VT.is256BitVector() \|\| VT.is512BitVector()) &&
	"Expected 256-bit or 512-bit vector");

	bool UndefLower = isUndefLowerHalf(Mask);
	if (!UndefLower && !isUndefUpperHalf(Mask))
	return SDValue();

	assert((!UndefLower \|\| !isUndefUpperHalf(Mask)) &&
	"Completely undef shuffle mask should have been simplified already");

	// Upper half is undef and lower half is whole upper subvector.
	// e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
	MVT HalfVT = VT.getHalfNumVectorElementsVT();
	unsigned HalfNumElts = HalfVT.getVectorNumElements();
	if (!UndefLower &&
	isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
	SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
	DAG.getIntPtrConstant(HalfNumElts, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
	DAG.getIntPtrConstant(0, DL));
	}

	// Lower half is undef and upper half is whole lower subvector.
	// e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
	if (UndefLower &&
	isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
	SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
	DAG.getIntPtrConstant(HalfNumElts, DL));
	}

	int HalfIdx1, HalfIdx2;
	SmallVector<int, 8> HalfMask(HalfNumElts);
	if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2))
	return SDValue();

	assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");

	// Only shuffle the halves of the inputs when useful.
	unsigned NumLowerHalves =
	(HalfIdx1 == 0 \|\| HalfIdx1 == 2) + (HalfIdx2 == 0 \|\| HalfIdx2 == 2);
	unsigned NumUpperHalves =
	(HalfIdx1 == 1 \|\| HalfIdx1 == 3) + (HalfIdx2 == 1 \|\| HalfIdx2 == 3);
	assert(NumLowerHalves + NumUpperHalves <= 2 && "Only 1 or 2 halves allowed");

	// Determine the larger pattern of undef/halves, then decide if it's worth
	// splitting the shuffle based on subtarget capabilities and types.
	unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
	if (!UndefLower) {
	// XXXXuuuu: no insert is needed.
	// Always extract lowers when setting lower - these are all free subreg ops.
	if (NumUpperHalves == 0)
	return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
	UndefLower, DAG);

	if (NumUpperHalves == 1) {
	// AVX2 has efficient 32/64-bit element cross-lane shuffles.
	if (Subtarget.hasAVX2()) {
	// extract128 + vunpckhps/vshufps, is better than vblend + vpermps.
	if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() &&
	!is128BitUnpackShuffleMask(HalfMask, DAG) &&
	(!isSingleSHUFPSMask(HalfMask) \|\|
	Subtarget.hasFastVariableCrossLaneShuffle()))
	return SDValue();
	// If this is a unary shuffle (assume that the 2nd operand is
	// canonicalized to undef), then we can use vpermpd. Otherwise, we
	// are better off extracting the upper half of 1 operand and using a
	// narrow shuffle.
	if (EltWidth == 64 && V2.isUndef())
	return SDValue();
	}
	// AVX512 has efficient cross-lane shuffles for all legal 512-bit types.
	if (Subtarget.hasAVX512() && VT.is512BitVector())
	return SDValue();
	// Extract + narrow shuffle is better than the wide alternative.
	return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
	UndefLower, DAG);
	}

	// Don't extract both uppers, instead shuffle and then extract.
	assert(NumUpperHalves == 2 && "Half vector count went wrong");
	return SDValue();
	}

	// UndefLower - uuuuXXXX: an insert to high half is required if we split this.
	if (NumUpperHalves == 0) {
	// AVX2 has efficient 64-bit element cross-lane shuffles.
	// TODO: Refine to account for unary shuffle, splat, and other masks?
	if (Subtarget.hasAVX2() && EltWidth == 64)
	return SDValue();
	// AVX512 has efficient cross-lane shuffles for all legal 512-bit types.
	if (Subtarget.hasAVX512() && VT.is512BitVector())
	return SDValue();
	// Narrow shuffle + insert is better than the wide alternative.
	return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
	UndefLower, DAG);
	}

	// NumUpperHalves != 0: don't bother with extract, shuffle, and then insert.
	return SDValue();
	}

	/// Handle case where shuffle sources are coming from the same 128-bit lane and
	/// every lane can be represented as the same repeating mask - allowing us to
	/// shuffle the sources with the repeating shuffle and then permute the result
	/// to the destination lanes.
	static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	int NumElts = VT.getVectorNumElements();
	int NumLanes = VT.getSizeInBits() / 128;
	int NumLaneElts = NumElts / NumLanes;

	// On AVX2 we may be able to just shuffle the lowest elements and then
	// broadcast the result.
	if (Subtarget.hasAVX2()) {
	for (unsigned BroadcastSize : {16, 32, 64}) {
	if (BroadcastSize <= VT.getScalarSizeInBits())
	continue;
	int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits();

	// Attempt to match a repeating pattern every NumBroadcastElts,
	// accounting for UNDEFs but only references the lowest 128-bit
	// lane of the inputs.
	auto FindRepeatingBroadcastMask = [&](SmallVectorImpl<int> &RepeatMask) {
	for (int i = 0; i != NumElts; i += NumBroadcastElts)
	for (int j = 0; j != NumBroadcastElts; ++j) {
	int M = Mask[i + j];
	if (M < 0)
	continue;
	int &R = RepeatMask[j];
	if (0 != ((M % NumElts) / NumLaneElts))
	return false;
	if (0 <= R && R != M)
	return false;
	R = M;
	}
	return true;
	};

	SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1);
	if (!FindRepeatingBroadcastMask(RepeatMask))
	continue;

	// Shuffle the (lowest) repeated elements in place for broadcast.
	SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask);

	// Shuffle the actual broadcast.
	SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1);
	for (int i = 0; i != NumElts; i += NumBroadcastElts)
	for (int j = 0; j != NumBroadcastElts; ++j)
	BroadcastMask[i + j] = j;
	return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),
	BroadcastMask);
	}
	}

	// Bail if the shuffle mask doesn't cross 128-bit lanes.
	if (!is128BitLaneCrossingShuffleMask(VT, Mask))
	return SDValue();

	// Bail if we already have a repeated lane shuffle mask.
	if (is128BitLaneRepeatedShuffleMask(VT, Mask))
	return SDValue();

	// Helper to look for repeated mask in each split sublane, and that those
	// sublanes can then be permuted into place.
	auto ShuffleSubLanes = [&](int SubLaneScale) {
	int NumSubLanes = NumLanes * SubLaneScale;
	int NumSubLaneElts = NumLaneElts / SubLaneScale;

	// Check that all the sources are coming from the same lane and see if we
	// can form a repeating shuffle mask (local to each sub-lane). At the same
	// time, determine the source sub-lane for each destination sub-lane.
	int TopSrcSubLane = -1;
	SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
	SmallVector<SmallVector<int, 8>> RepeatedSubLaneMasks(
	SubLaneScale,
	SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef));

	for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
	// Extract the sub-lane mask, check that it all comes from the same lane
	// and normalize the mask entries to come from the first lane.
	int SrcLane = -1;
	SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
	for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
	int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
	if (M < 0)
	continue;
	int Lane = (M % NumElts) / NumLaneElts;
	if ((0 <= SrcLane) && (SrcLane != Lane))
	return SDValue();
	SrcLane = Lane;
	int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
	SubLaneMask[Elt] = LocalM;
	}

	// Whole sub-lane is UNDEF.
	if (SrcLane < 0)
	continue;

	// Attempt to match against the candidate repeated sub-lane masks.
	for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
	auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
	for (int i = 0; i != NumSubLaneElts; ++i) {
	if (M1[i] < 0 \|\| M2[i] < 0)
	continue;
	if (M1[i] != M2[i])
	return false;
	}
	return true;
	};

	auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
	if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
	continue;

	// Merge the sub-lane mask into the matching repeated sub-lane mask.
	for (int i = 0; i != NumSubLaneElts; ++i) {
	int M = SubLaneMask[i];
	if (M < 0)
	continue;
	assert((RepeatedSubLaneMask[i] < 0 \|\| RepeatedSubLaneMask[i] == M) &&
	"Unexpected mask element");
	RepeatedSubLaneMask[i] = M;
	}

	// Track the top most source sub-lane - by setting the remaining to
	// UNDEF we can greatly simplify shuffle matching.
	int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
	TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
	Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
	break;
	}

	// Bail if we failed to find a matching repeated sub-lane mask.
	if (Dst2SrcSubLanes[DstSubLane] < 0)
	return SDValue();
	}
	assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
	"Unexpected source lane");

	// Create a repeating shuffle mask for the entire vector.
	SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
	for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
	int Lane = SubLane / SubLaneScale;
	auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
	for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
	int M = RepeatedSubLaneMask[Elt];
	if (M < 0)
	continue;
	int Idx = (SubLane * NumSubLaneElts) + Elt;
	RepeatedMask[Idx] = M + (Lane * NumLaneElts);
	}
	}

	// Shuffle each source sub-lane to its destination.
	SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
	for (int i = 0; i != NumElts; i += NumSubLaneElts) {
	int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
	if (SrcSubLane < 0)
	continue;
	for (int j = 0; j != NumSubLaneElts; ++j)
	SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
	}

	// Avoid returning the same shuffle operation.
	// v8i32 = vector_shuffle<0,1,4,5,2,3,6,7> t5, undef:v8i32
	if (RepeatedMask == Mask \|\| SubLaneMask == Mask)
	return SDValue();

	SDValue RepeatedShuffle =
	DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);

	return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
	SubLaneMask);
	};

	// On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
	// (with PERMQ/PERMPD). On AVX2/AVX512BW targets, permuting 32-bit sub-lanes,
	// even with a variable shuffle, can be worth it for v32i8/v64i8 vectors.
	// Otherwise we can only permute whole 128-bit lanes.
	int MinSubLaneScale = 1, MaxSubLaneScale = 1;
	if (Subtarget.hasAVX2() && VT.is256BitVector()) {
	bool OnlyLowestElts = isUndefOrInRange(Mask, 0, NumLaneElts);
	MinSubLaneScale = 2;
	MaxSubLaneScale =
	(!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2;
	}
	if (Subtarget.hasBWI() && VT == MVT::v64i8)
	MinSubLaneScale = MaxSubLaneScale = 4;

	for (int Scale = MinSubLaneScale; Scale <= MaxSubLaneScale; Scale *= 2)
	if (SDValue Shuffle = ShuffleSubLanes(Scale))
	return Shuffle;

	return SDValue();
	}

	static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
	bool &ForceV1Zero, bool &ForceV2Zero,
	unsigned &ShuffleImm, ArrayRef<int> Mask,
	const APInt &Zeroable) {
	int NumElts = VT.getVectorNumElements();
	assert(VT.getScalarSizeInBits() == 64 &&
	(NumElts == 2 \|\| NumElts == 4 \|\| NumElts == 8) &&
	"Unexpected data type for VSHUFPD");
	assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) &&
	"Illegal shuffle mask");

	bool ZeroLane[2] = { true, true };
	for (int i = 0; i < NumElts; ++i)
	ZeroLane[i & 1] &= Zeroable[i];

	// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
	// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
	ShuffleImm = 0;
	bool ShufpdMask = true;
	bool CommutableMask = true;
	for (int i = 0; i < NumElts; ++i) {
	if (Mask[i] == SM_SentinelUndef \|\| ZeroLane[i & 1])
	continue;
	if (Mask[i] < 0)
	return false;
	int Val = (i & 6) + NumElts * (i & 1);
	int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
	if (Mask[i] < Val \|\| Mask[i] > Val + 1)
	ShufpdMask = false;
	if (Mask[i] < CommutVal \|\| Mask[i] > CommutVal + 1)
	CommutableMask = false;
	ShuffleImm \|= (Mask[i] % 2) << i;
	}

	if (!ShufpdMask && !CommutableMask)
	return false;

	if (!ShufpdMask && CommutableMask)
	std::swap(V1, V2);

	ForceV1Zero = ZeroLane[0];
	ForceV2Zero = ZeroLane[1];
	return true;
	}

	static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert((VT == MVT::v2f64 \|\| VT == MVT::v4f64 \|\| VT == MVT::v8f64) &&
	"Unexpected data type for VSHUFPD");

	unsigned Immediate = 0;
	bool ForceV1Zero = false, ForceV2Zero = false;
	if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate,
	Mask, Zeroable))
	return SDValue();

	// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
	if (ForceV1Zero)
	V1 = getZeroVector(VT, Subtarget, DAG, DL);
	if (ForceV2Zero)
	V2 = getZeroVector(VT, Subtarget, DAG, DL);

	return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
	DAG.getTargetConstant(Immediate, DL, MVT::i8));
	}

	// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed
	// by zeroable elements in the remaining 24 elements. Turn this into two
	// vmovqb instructions shuffled together.
	static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	const APInt &Zeroable,
	SelectionDAG &DAG) {
	assert(VT == MVT::v32i8 && "Unexpected type!");

	// The first 8 indices should be every 8th element.
	if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8))
	return SDValue();

	// Remaining elements need to be zeroable.
	if (Zeroable.countLeadingOnes() < (Mask.size() - 8))
	return SDValue();

	V1 = DAG.getBitcast(MVT::v4i64, V1);
	V2 = DAG.getBitcast(MVT::v4i64, V2);

	V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1);
	V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2);

	// The VTRUNCs will put 0s in the upper 12 bytes. Use them to put zeroes in
	// the upper bits of the result using an unpckldq.
	SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2,
	{ 0, 1, 2, 3, 16, 17, 18, 19,
	4, 5, 6, 7, 20, 21, 22, 23 });
	// Insert the unpckldq into a zero vector to widen to v32i8.
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8,
	DAG.getConstant(0, DL, MVT::v32i8), Unpack,
	DAG.getIntPtrConstant(0, DL));
	}

	// a = shuffle v1, v2, mask1 ; interleaving lower lanes of v1 and v2
	// b = shuffle v1, v2, mask2 ; interleaving higher lanes of v1 and v2
	// =>
	// ul = unpckl v1, v2
	// uh = unpckh v1, v2
	// a = vperm ul, uh
	// b = vperm ul, uh
	//
	// Pattern-match interleave(256b v1, 256b v2) -> 512b v3 and lower it into unpck
	// and permute. We cannot directly match v3 because it is split into two
	// 256-bit vectors in earlier isel stages. Therefore, this function matches a
	// pair of 256-bit shuffles and makes sure the masks are consecutive.
	//
	// Once unpck and permute nodes are created, the permute corresponding to this
	// shuffle is returned, while the other permute replaces the other half of the
	// shuffle in the selection dag.
	static SDValue lowerShufflePairAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	if (VT != MVT::v8f32 && VT != MVT::v8i32 && VT != MVT::v16i16 &&
	VT != MVT::v32i8)
	return SDValue();
	// <B0, B1, B0+1, B1+1, ..., >
	auto IsInterleavingPattern = [&](ArrayRef<int> Mask, unsigned Begin0,
	unsigned Begin1) {
	size_t Size = Mask.size();
	assert(Size % 2 == 0 && "Expected even mask size");
	for (unsigned I = 0; I < Size; I += 2) {
	if (Mask[I] != (int)(Begin0 + I / 2) \|\|
	Mask[I + 1] != (int)(Begin1 + I / 2))
	return false;
	}
	return true;
	};
	// Check which half is this shuffle node
	int NumElts = VT.getVectorNumElements();
	size_t FirstQtr = NumElts / 2;
	size_t ThirdQtr = NumElts + NumElts / 2;
	bool IsFirstHalf = IsInterleavingPattern(Mask, 0, NumElts);
	bool IsSecondHalf = IsInterleavingPattern(Mask, FirstQtr, ThirdQtr);
	if (!IsFirstHalf && !IsSecondHalf)
	return SDValue();

	// Find the intersection between shuffle users of V1 and V2.
	SmallVector<SDNode *, 2> Shuffles;
	for (SDNode *User : V1->uses())
	if (User->getOpcode() == ISD::VECTOR_SHUFFLE && User->getOperand(0) == V1 &&
	User->getOperand(1) == V2)
	Shuffles.push_back(User);
	// Limit user size to two for now.
	if (Shuffles.size() != 2)
	return SDValue();
	// Find out which half of the 512-bit shuffles is each smaller shuffle
	auto *SVN1 = cast<ShuffleVectorSDNode>(Shuffles[0]);
	auto *SVN2 = cast<ShuffleVectorSDNode>(Shuffles[1]);
	SDNode *FirstHalf;
	SDNode *SecondHalf;
	if (IsInterleavingPattern(SVN1->getMask(), 0, NumElts) &&
	IsInterleavingPattern(SVN2->getMask(), FirstQtr, ThirdQtr)) {
	FirstHalf = Shuffles[0];
	SecondHalf = Shuffles[1];
	} else if (IsInterleavingPattern(SVN1->getMask(), FirstQtr, ThirdQtr) &&
	IsInterleavingPattern(SVN2->getMask(), 0, NumElts)) {
	FirstHalf = Shuffles[1];
	SecondHalf = Shuffles[0];
	} else {
	return SDValue();
	}
	// Lower into unpck and perm. Return the perm of this shuffle and replace
	// the other.
	SDValue Unpckl = DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
	SDValue Unpckh = DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
	SDValue Perm1 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh,
	DAG.getTargetConstant(0x20, DL, MVT::i8));
	SDValue Perm2 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh,
	DAG.getTargetConstant(0x31, DL, MVT::i8));
	if (IsFirstHalf) {
	DAG.ReplaceAllUsesWith(SecondHalf, &Perm2);
	return Perm1;
	}
	DAG.ReplaceAllUsesWith(FirstHalf, &Perm1);
	return Perm2;
	}

	/// Handle lowering of 4-lane 64-bit floating point shuffles.
	///
	/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
	/// isn't available.
	static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	if (V2.isUndef()) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// Use low duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2))
	return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);

	if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
	// Non-half-crossing single input shuffles can be lowered with an
	// interleaved permutation.
	unsigned VPERMILPMask = (Mask[0] == 1) \| ((Mask[1] == 1) << 1) \|
	((Mask[2] == 3) << 2) \| ((Mask[3] == 3) << 3);
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
	DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
	}

	// With AVX2 we have direct support for this permutation.
	if (Subtarget.hasAVX2())
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return V;

	// Try to permute the lanes and then use a per-lane permute.
	if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2,
	Mask, DAG, Subtarget))
	return V;

	// Otherwise, fall back.
	return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask,
	DAG, Subtarget);
	}

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
	return V;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check if the blend happens to exactly fit that of SHUFPD.
	if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Op;

	bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask);
	bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask);

	// If we have lane crossing shuffles AND they don't all come from the lower
	// lane elements, lower to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
	// TODO: Handle BUILD_VECTOR sources which getVectorShuffle currently
	// canonicalize to a blend of splat which isn't necessary for this combine.
	if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask) &&
	!all_of(Mask, [](int M) { return M < 2 \|\| (4 <= M && M < 6); }) &&
	(V1.getOpcode() != ISD::BUILD_VECTOR) &&
	(V2.getOpcode() != ISD::BUILD_VECTOR))
	return lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, Mask, DAG);

	// If we have one input in place, then we can permute the other input and
	// blend the result.
	if (V1IsInPlace \|\| V2IsInPlace)
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask,
	Subtarget, DAG);

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return V;

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle. However, if we have AVX2 and either inputs are already in place,
	// we will be able to shuffle even across lanes the other input in a single
	// instruction so skip this pattern.
	if (!(Subtarget.hasAVX2() && (V1IsInPlace \|\| V2IsInPlace)))
	if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return V;

	// If we have VLX support, we can use VEXPAND.
	if (Subtarget.hasVLX())
	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;

	// If we have AVX2 then we always want to lower with a blend because an v4 we
	// can fully permute the elements.
	if (Subtarget.hasAVX2())
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask,
	Subtarget, DAG);

	// Otherwise fall back on generic lowering.
	return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
	Subtarget, DAG);
	}

	/// Handle lowering of 4-lane 64-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v4i64 shuffling..
	static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");

	if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
	Subtarget, DAG))
	return Broadcast;

	if (V2.isUndef()) {
	// When the shuffle is mirrored between the 128-bit lanes of the unit, we
	// can use lower latency instructions that will operate on both lanes.
	SmallVector<int, 2> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
	SmallVector<int, 4> PSHUFDMask;
	narrowShuffleMaskElts(2, RepeatedMask, PSHUFDMask);
	return DAG.getBitcast(
	MVT::v4i64,
	DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
	DAG.getBitcast(MVT::v8i32, V1),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
	}

	// AVX2 provides a direct instruction for permuting a single input across
	// lanes.
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// If we have VLX support, we can use VALIGN or VEXPAND.
	if (Subtarget.hasVLX()) {
	if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;
	}

	// Try to use PALIGNR.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
	return V;

	bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask);
	bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask);

	// If we have one input in place, then we can permute the other input and
	// blend the result.
	if (V1IsInPlace \|\| V2IsInPlace)
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask,
	Subtarget, DAG);

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
	return V;

	// Try to lower to PERMQ(BLENDD(V1,V2)).
	if (SDValue V =
	lowerShuffleAsBlendAndPermute(DL, MVT::v4i64, V1, V2, Mask, DAG))
	return V;

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle. However, if we have AVX2 and either inputs are already in place,
	// we will be able to shuffle even across lanes the other input in a single
	// instruction so skip this pattern.
	if (!V1IsInPlace && !V2IsInPlace)
	if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Otherwise fall back on generic blend lowering.
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask,
	Subtarget, DAG);
	}

	/// Handle lowering of 8-lane 32-bit floating point shuffles.
	///
	/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
	/// isn't available.
	static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
	Subtarget, DAG))
	return Broadcast;

	// If the shuffle mask is repeated in each 128-bit lane, we have many more
	// options to efficiently lower the shuffle.
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
	assert(RepeatedMask.size() == 4 &&
	"Repeated masks must be half the mask width!");

	// Use even/odd duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2))
	return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
	if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2))
	return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);

	if (V2.isUndef())
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
	return V;

	// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
	// have already handled any direct blends.
	return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
	}

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
	return V;

	// If we have a single input shuffle with different shuffle patterns in the
	// two 128-bit lanes use the variable mask to VPERMILPS.
	if (V2.isUndef()) {
	if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) {
	SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
	return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask);
	}
	if (Subtarget.hasAVX2()) {
	SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
	return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
	}
	// Otherwise, fall back.
	return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask,
	DAG, Subtarget);
	}

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// If we have VLX support, we can use VEXPAND.
	if (Subtarget.hasVLX())
	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;

	// Try to match an interleave of two v8f32s and lower them as unpck and
	// permutes using ymms. This needs to go before we try to split the vectors.
	//
	// TODO: Expand this to AVX1. Currently v8i32 is casted to v8f32 and hits
	// this path inadvertently.
	if (Subtarget.hasAVX2() && !Subtarget.hasAVX512())
	if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v8f32, V1, V2,
	Mask, DAG))
	return V;

	// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
	// since after split we get a more efficient code using vpunpcklwd and
	// vpunpckhwd instrs than vblend.
	if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32, DAG))
	return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget,
	DAG);

	// If we have AVX2 then we always want to lower with a blend because at v8 we
	// can fully permute the elements.
	if (Subtarget.hasAVX2())
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8f32, V1, V2, Mask,
	Subtarget, DAG);

	// Otherwise fall back on generic lowering.
	return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
	Subtarget, DAG);
	}

	/// Handle lowering of 8-lane 32-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v8i32 shuffling..
	static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return ZExt;

	// Try to match an interleave of two v8i32s and lower them as unpck and
	// permutes using ymms. This needs to go before we try to split the vectors.
	if (!Subtarget.hasAVX512())
	if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v8i32, V1, V2,
	Mask, DAG))
	return V;

	// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
	// since after split we get a more efficient code than vblend by using
	// vpunpcklwd and vpunpckhwd instrs.
	if (isUnpackWdShuffleMask(Mask, MVT::v8i32, DAG) && !V2.isUndef() &&
	!Subtarget.hasAVX512())
	return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget,
	DAG);

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
	Subtarget, DAG))
	return Broadcast;

	// If the shuffle mask is repeated in each 128-bit lane we can use more
	// efficient instructions that mirror the shuffles across the two 128-bit
	// lanes.
	SmallVector<int, 4> RepeatedMask;
	bool Is128BitLaneRepeatedShuffle =
	is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask);
	if (Is128BitLaneRepeatedShuffle) {
	assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
	if (V2.isUndef())
	return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
	return V;
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// If we have VLX support, we can use VALIGN or EXPAND.
	if (Subtarget.hasVLX()) {
	if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;
	}

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
	return V;

	if (V2.isUndef()) {
	// Try to produce a fixed cross-128-bit lane permute followed by unpack
	// because that should be faster than the variable permute alternatives.
	if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v8i32, Mask, V1, V2, DAG))
	return V;

	// If the shuffle patterns aren't repeated but it's a single input, directly
	// generate a cross-lane VPERMD instruction.
	SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
	return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1);
	}

	// Assume that a single SHUFPS is faster than an alternative sequence of
	// multiple instructions (even if the CPU has a domain penalty).
	// If some CPU is harmed by the domain switch, we can fix it in a later pass.
	if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
	SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
	SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
	SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
	CastV1, CastV2, DAG);
	return DAG.getBitcast(MVT::v8i32, ShufPS);
	}

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Otherwise fall back on generic blend lowering.
	return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i32, V1, V2, Mask,
	Subtarget, DAG);
	}

	/// Handle lowering of 16-lane 16-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v16i16 shuffling..
	static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
	DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask,
	Subtarget, DAG))
	return Broadcast;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
	return V;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
	Subtarget))
	return V;

	// Try to use lower using a truncation.
	if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
	return V;

	if (V2.isUndef()) {
	// Try to use bit rotation instructions.
	if (SDValue Rotate =
	lowerShuffleAsBitRotate(DL, MVT::v16i16, V1, Mask, Subtarget, DAG))
	return Rotate;

	// Try to produce a fixed cross-128-bit lane permute followed by unpack
	// because that should be faster than the variable permute alternatives.
	if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, Mask, V1, V2, DAG))
	return V;

	// There are no generalized cross-lane shuffle operations available on i16
	// element types.
	if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
	if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
	DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
	return V;

	return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask,
	DAG, Subtarget);
	}

	SmallVector<int, 8> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
	// As this is a single-input shuffle, the repeated mask should be
	// a strictly valid v8i16 mask that we can pass through to the v8i16
	// lowering to handle even the v16 case.
	return lowerV8I16GeneralSingleInputShuffle(
	DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
	}
	}

	if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2,
	Zeroable, Subtarget, DAG))
	return PSHUFB;

	// AVX512BW can lower to VPERMW (non-VLX will pad to v32i16).
	if (Subtarget.hasBWI())
	return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, Subtarget, DAG);

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Try to permute the lanes and then use a per-lane permute.
	if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
	DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
	return V;

	// Try to match an interleave of two v16i16s and lower them as unpck and
	// permutes using ymms.
	if (!Subtarget.hasAVX512())
	if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v16i16, V1, V2,
	Mask, DAG))
	return V;

	// Otherwise fall back on generic lowering.
	return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
	Subtarget, DAG);
	}

	/// Handle lowering of 32-lane 8-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v32i8 shuffling..
	static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
	assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return ZExt;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
	Subtarget, DAG))
	return Broadcast;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
	return V;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
	Subtarget))
	return V;

	// Try to use lower using a truncation.
	if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Try to use bit rotation instructions.
	if (V2.isUndef())
	if (SDValue Rotate =
	lowerShuffleAsBitRotate(DL, MVT::v32i8, V1, Mask, Subtarget, DAG))
	return Rotate;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
	return V;

	// There are no generalized cross-lane shuffle operations available on i8
	// element types.
	if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
	// Try to produce a fixed cross-128-bit lane permute followed by unpack
	// because that should be faster than the variable permute alternatives.
	if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, Mask, V1, V2, DAG))
	return V;

	if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
	DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
	return V;

	return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask,
	DAG, Subtarget);
	}

	if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
	Zeroable, Subtarget, DAG))
	return PSHUFB;

	// AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8).
	if (Subtarget.hasVBMI())
	return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, Subtarget, DAG);

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Try to permute the lanes and then use a per-lane permute.
	if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
	DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
	return V;

	// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed
	// by zeroable elements in the remaining 24 elements. Turn this into two
	// vmovqb instructions shuffled together.
	if (Subtarget.hasVLX())
	if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2,
	Mask, Zeroable, DAG))
	return V;

	// Try to match an interleave of two v32i8s and lower them as unpck and
	// permutes using ymms.
	if (!Subtarget.hasAVX512())
	if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v32i8, V1, V2,
	Mask, DAG))
	return V;

	// Otherwise fall back on generic lowering.
	return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
	Subtarget, DAG);
	}

	/// High-level routine to lower various 256-bit x86 vector shuffles.
	///
	/// This routine either breaks down the specific type of a 256-bit x86 vector
	/// shuffle or splits it into two 128-bit shuffles and fuses the results back
	/// together based on the available instructions.
	static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
	SDValue V1, SDValue V2, const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// If we have a single input to the zero element, insert that into V1 if we
	// can do so cheaply.
	int NumElts = VT.getVectorNumElements();
	int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });

	if (NumV2Elements == 1 && Mask[0] >= NumElts)
	if (SDValue Insertion = lowerShuffleAsElementInsertion(
	DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;

	// Handle special cases where the lower or upper half is UNDEF.
	if (SDValue V =
	lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
	return V;

	// There is a really nice hard cut-over between AVX1 and AVX2 that means we
	// can check for those subtargets here and avoid much of the subtarget
	// querying in the per-vector-type lowering routines. With AVX1 we have
	// essentially zero ability to manipulate a 256-bit vector with integer
	// types. Since we'll use floating point types there eventually, just
	// immediately cast everything to a float and operate entirely in that domain.
	if (VT.isInteger() && !Subtarget.hasAVX2()) {
	int ElementBits = VT.getScalarSizeInBits();
	if (ElementBits < 32) {
	// No floating point type available, if we can't use the bit operations
	// for masking/blending then decompose into 128-bit vectors.
	if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;
	if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
	return V;
	return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
	}

	MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
	VT.getVectorNumElements());
	V1 = DAG.getBitcast(FpVT, V1);
	V2 = DAG.getBitcast(FpVT, V2);
	return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
	}

	- if (VT == MVT::v16f16) {
	- V1 = DAG.getBitcast(MVT::v16i16, V1);
	- V2 = DAG.getBitcast(MVT::v16i16, V2);
	- return DAG.getBitcast(MVT::v16f16,
	- DAG.getVectorShuffle(MVT::v16i16, DL, V1, V2, Mask));
	+ if (VT == MVT::v16f16 \|\| VT.getVectorElementType() == MVT::bf16) {
	+ MVT IVT = VT.changeVectorElementTypeToInteger();
	+ V1 = DAG.getBitcast(IVT, V1);
	+ V2 = DAG.getBitcast(IVT, V2);
	+ return DAG.getBitcast(VT, DAG.getVectorShuffle(IVT, DL, V1, V2, Mask));
	}

	switch (VT.SimpleTy) {
	case MVT::v4f64:
	return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v4i64:
	return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8f32:
	return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8i32:
	return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16i16:
	return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v32i8:
	return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);

	default:
	llvm_unreachable("Not a valid 256-bit x86 vector type!");
	}
	}

	/// Try to lower a vector shuffle as a 128-bit shuffles.
	static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(VT.getScalarSizeInBits() == 64 &&
	"Unexpected element type size for 128bit shuffle.");

	// To handle 256 bit vector requires VLX and most probably
	// function lowerV2X128VectorShuffle() is better solution.
	assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");

	// TODO - use Zeroable like we do for lowerV2X128VectorShuffle?
	SmallVector<int, 4> Widened128Mask;
	if (!canWidenShuffleElements(Mask, Widened128Mask))
	return SDValue();
	assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch");

	// Try to use an insert into a zero vector.
	if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 &&
	(Widened128Mask[1] == 1 \|\| (Zeroable & 0x0c) == 0x0c)) {
	unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4;
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
	SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
	getZeroVector(VT, Subtarget, DAG, DL), LoV,
	DAG.getIntPtrConstant(0, DL));
	}

	// Check for patterns which can be matched with a single insert of a 256-bit
	// subvector.
	bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2);
	if (OnlyUsesV1 \|\|
	isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
	SDValue SubVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,
	DAG.getIntPtrConstant(4, DL));
	}

	// See if this is an insertion of the lower 128-bits of V2 into V1.
	bool IsInsert = true;
	int V2Index = -1;
	for (int i = 0; i < 4; ++i) {
	assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");
	if (Widened128Mask[i] < 0)
	continue;

	// Make sure all V1 subvectors are in place.
	if (Widened128Mask[i] < 4) {
	if (Widened128Mask[i] != i) {
	IsInsert = false;
	break;
	}
	} else {
	// Make sure we only have a single V2 index and its the lowest 128-bits.
	if (V2Index >= 0 \|\| Widened128Mask[i] != 4) {
	IsInsert = false;
	break;
	}
	V2Index = i;
	}
	}
	if (IsInsert && V2Index >= 0) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
	SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
	DAG.getIntPtrConstant(0, DL));
	return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
	}

	// See if we can widen to a 256-bit lane shuffle, we're going to lose 128-lane
	// UNDEF info by lowering to X86ISD::SHUF128 anyway, so by widening where
	// possible we at least ensure the lanes stay sequential to help later
	// combines.
	SmallVector<int, 2> Widened256Mask;
	if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) {
	Widened128Mask.clear();
	narrowShuffleMaskElts(2, Widened256Mask, Widened128Mask);
	}

	// Try to lower to vshuf64x2/vshuf32x4.
	SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
	unsigned PermMask = 0;
	// Insure elements came from the same Op.
	for (int i = 0; i < 4; ++i) {
	assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");
	if (Widened128Mask[i] < 0)
	continue;

	SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1;
	unsigned OpIndex = i / 2;
	if (Ops[OpIndex].isUndef())
	Ops[OpIndex] = Op;
	else if (Ops[OpIndex] != Op)
	return SDValue();

	// Convert the 128-bit shuffle mask selection values into 128-bit selection
	// bits defined by a vshuf64x2 instruction's immediate control byte.
	PermMask \|= (Widened128Mask[i] % 4) << (i * 2);
	}

	return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
	DAG.getTargetConstant(PermMask, DL, MVT::i8));
	}

	/// Handle lowering of 8-lane 64-bit floating point shuffles.
	static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	if (V2.isUndef()) {
	// Use low duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2))
	return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1);

	if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) {
	// Non-half-crossing single input shuffles can be lowered with an
	// interleaved permutation.
	unsigned VPERMILPMask = (Mask[0] == 1) \| ((Mask[1] == 1) << 1) \|
	((Mask[2] == 3) << 2) \| ((Mask[3] == 3) << 3) \|
	((Mask[4] == 5) << 4) \| ((Mask[5] == 5) << 5) \|
	((Mask[6] == 7) << 6) \| ((Mask[7] == 7) << 7);
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
	DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
	}

	SmallVector<int, 4> RepeatedMask;
	if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask))
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
	}

	if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1,
	V2, Subtarget, DAG))
	return Shuf128;

	if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
	return Unpck;

	// Check if the blend happens to exactly fit that of SHUFPD.
	if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Op;

	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, Subtarget, DAG);
	}

	/// Handle lowering of 16-lane 32-bit floating point shuffles.
	static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

	// If the shuffle mask is repeated in each 128-bit lane, we have many more
	// options to efficiently lower the shuffle.
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) {
	assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");

	// Use even/odd duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2))
	return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1);
	if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2))
	return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1);

	if (V2.isUndef())
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
	return V;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Otherwise, fall back to a SHUFPS sequence.
	return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
	}

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG))
	return V;

	// If we have a single input shuffle with different shuffle patterns in the
	// 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
	if (V2.isUndef() &&
	!is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
	SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true);
	return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask);
	}

	// If we have AVX512F support, we can use VEXPAND.
	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;

	return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG);
	}

	/// Handle lowering of 8-lane 64-bit integer shuffles.
	static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	if (V2.isUndef()) {
	// When the shuffle is mirrored between the 128-bit lanes of the unit, we
	// can use lower latency instructions that will operate on all four
	// 128-bit lanes.
	SmallVector<int, 2> Repeated128Mask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
	SmallVector<int, 4> PSHUFDMask;
	narrowShuffleMaskElts(2, Repeated128Mask, PSHUFDMask);
	return DAG.getBitcast(
	MVT::v8i64,
	DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
	DAG.getBitcast(MVT::v16i32, V1),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
	}

	SmallVector<int, 4> Repeated256Mask;
	if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask))
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1,
	getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
	}

	if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1,
	V2, Subtarget, DAG))
	return Shuf128;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use VALIGN.
	if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Try to use PALIGNR.
	if (Subtarget.hasBWI())
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
	return Unpck;

	// If we have AVX512F support, we can use VEXPAND.
	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG);
	}

	/// Handle lowering of 16-lane 32-bit integer shuffles.
	static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
	DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// If the shuffle mask is repeated in each 128-bit lane we can use more
	// efficient instructions that mirror the shuffles across the four 128-bit
	// lanes.
	SmallVector<int, 4> RepeatedMask;
	bool Is128BitLaneRepeatedShuffle =
	is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask);
	if (Is128BitLaneRepeatedShuffle) {
	assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
	if (V2.isUndef())
	return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
	return V;
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use VALIGN.
	if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Try to use byte rotation instructions.
	if (Subtarget.hasBWI())
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Assume that a single SHUFPS is faster than using a permv shuffle.
	// If some CPU is harmed by the domain switch, we can fix it in a later pass.
	if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
	SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
	SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
	SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
	CastV1, CastV2, DAG);
	return DAG.getBitcast(MVT::v16i32, ShufPS);
	}

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
	return V;

	// If we have AVX512F support, we can use VEXPAND.
	if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2,
	DAG, Subtarget))
	return V;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, Subtarget, DAG);
	}

	/// Handle lowering of 32-lane 16-bit integer shuffles.
	static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
	assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
	assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
	DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
	return V;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V =
	lowerShuffleWithPACK(DL, MVT::v32i16, Mask, V1, V2, DAG, Subtarget))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	if (V2.isUndef()) {
	// Try to use bit rotation instructions.
	if (SDValue Rotate =
	lowerShuffleAsBitRotate(DL, MVT::v32i16, V1, Mask, Subtarget, DAG))
	return Rotate;

	SmallVector<int, 8> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {
	// As this is a single-input shuffle, the repeated mask should be
	// a strictly valid v8i16 mask that we can pass through to the v8i16
	// lowering to handle even the v32 case.
	return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1,
	RepeatedMask, Subtarget, DAG);
	}
	}

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2,
	Zeroable, Subtarget, DAG))
	return PSHUFB;

	return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, Subtarget, DAG);
	}

	/// Handle lowering of 64-lane 8-bit integer shuffles.
	static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
	assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
	assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
	DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
	return V;

	// Use dedicated pack instructions for masks that match their pattern.
	if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
	Subtarget))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask,
	Subtarget, DAG))
	return Rotate;

	// Try to use bit rotation instructions.
	if (V2.isUndef())
	if (SDValue Rotate =
	lowerShuffleAsBitRotate(DL, MVT::v64i8, V1, Mask, Subtarget, DAG))
	return Rotate;

	// Lower as AND if possible.
	if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Masked;

	if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2,
	Zeroable, Subtarget, DAG))
	return PSHUFB;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
	return V;

	if (SDValue Result = lowerShuffleAsLanePermuteAndPermute(
	DL, MVT::v64i8, V1, V2, Mask, DAG, Subtarget))
	return Result;

	if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	if (!is128BitLaneCrossingShuffleMask(MVT::v64i8, Mask)) {
	// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
	// PALIGNR will be cheaper than the second PSHUFB+OR.
	if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v64i8, V1, V2,
	Mask, Subtarget, DAG))
	return V;

	// If we can't directly blend but can use PSHUFB, that will be better as it
	// can both shuffle and set up the inefficient blend.
	bool V1InUse, V2InUse;
	return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v64i8, V1, V2, Mask, Zeroable,
	DAG, V1InUse, V2InUse);
	}

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (!V2.isUndef())
	if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
	DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// VBMI can use VPERMV/VPERMV3 byte shuffles.
	if (Subtarget.hasVBMI())
	return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);

	return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
	}

	/// High-level routine to lower various 512-bit x86 vector shuffles.
	///
	/// This routine either breaks down the specific type of a 512-bit x86 vector
	/// shuffle or splits it into two 256-bit shuffles and fuses the results back
	/// together based on the available instructions.
	static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"Cannot lower 512-bit vectors w/ basic ISA!");

	// If we have a single input to the zero element, insert that into V1 if we
	// can do so cheaply.
	int NumElts = Mask.size();
	int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });

	if (NumV2Elements == 1 && Mask[0] >= NumElts)
	if (SDValue Insertion = lowerShuffleAsElementInsertion(
	DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;

	// Handle special cases where the lower or upper half is UNDEF.
	if (SDValue V =
	lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
	return V;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
	Subtarget, DAG))
	return Broadcast;

	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !Subtarget.hasBWI()) {
	// Try using bit ops for masking and blending before falling back to
	// splitting.
	if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
	Subtarget, DAG))
	return V;
	if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
	return V;

	return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
	}

	if (VT == MVT::v32f16) {
	V1 = DAG.getBitcast(MVT::v32i16, V1);
	V2 = DAG.getBitcast(MVT::v32i16, V2);
	return DAG.getBitcast(MVT::v32f16,
	DAG.getVectorShuffle(MVT::v32i16, DL, V1, V2, Mask));
	}

	// Dispatch to each element type for lowering. If we don't have support for
	// specific element type shuffles at 512 bits, immediately split them and
	// lower them. Each lowering routine of a given type is allowed to assume that
	// the requisite ISA extensions for that element type are available.
	switch (VT.SimpleTy) {
	case MVT::v8f64:
	return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16f32:
	return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8i64:
	return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16i32:
	return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v32i16:
	return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v64i8:
	return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);

	default:
	llvm_unreachable("Not a valid 512-bit x86 vector type!");
	}
	}

	static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// Shuffle should be unary.
	if (!V2.isUndef())
	return SDValue();

	int ShiftAmt = -1;
	int NumElts = Mask.size();
	for (int i = 0; i != NumElts; ++i) {
	int M = Mask[i];
	assert((M == SM_SentinelUndef \|\| (0 <= M && M < NumElts)) &&
	"Unexpected mask index.");
	if (M < 0)
	continue;

	// The first non-undef element determines our shift amount.
	if (ShiftAmt < 0) {
	ShiftAmt = M - i;
	// Need to be shifting right.
	if (ShiftAmt <= 0)
	return SDValue();
	}
	// All non-undef elements must shift by the same amount.
	if (ShiftAmt != M - i)
	return SDValue();
	}
	assert(ShiftAmt >= 0 && "All undef?");

	// Great we found a shift right.
	MVT WideVT = VT;
	if ((!Subtarget.hasDQI() && NumElts == 8) \|\| NumElts < 8)
	WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
	SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
	DAG.getUNDEF(WideVT), V1,
	DAG.getIntPtrConstant(0, DL));
	Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res,
	DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	}

	// Determine if this shuffle can be implemented with a KSHIFT instruction.
	// Returns the shift amount if possible or -1 if not. This is a simplified
	// version of matchShuffleAsShift.
	static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
	int MaskOffset, const APInt &Zeroable) {
	int Size = Mask.size();

	auto CheckZeros = [&](int Shift, bool Left) {
	for (int j = 0; j < Shift; ++j)
	if (!Zeroable[j + (Left ? 0 : (Size - Shift))])
	return false;

	return true;
	};

	auto MatchShift = [&](int Shift, bool Left) {
	unsigned Pos = Left ? Shift : 0;
	unsigned Low = Left ? 0 : Shift;
	unsigned Len = Size - Shift;
	return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset);
	};

	for (int Shift = 1; Shift != Size; ++Shift)
	for (bool Left : {true, false})
	if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) {
	Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR;
	return Shift;
	}

	return -1;
	}


	// Lower vXi1 vector shuffles.
	// There is no a dedicated instruction on AVX-512 that shuffles the masks.
	// The only way to shuffle bits is to sign-extend the mask vector to SIMD
	// vector, shuffle and then truncate it back.
	static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"Cannot lower 512-bit vectors w/o basic ISA!");

	int NumElts = Mask.size();

	// Try to recognize shuffles that are just padding a subvector with zeros.
	int SubvecElts = 0;
	int Src = -1;
	for (int i = 0; i != NumElts; ++i) {
	if (Mask[i] >= 0) {
	// Grab the source from the first valid mask. All subsequent elements need
	// to use this same source.
	if (Src < 0)
	Src = Mask[i] / NumElts;
	if (Src != (Mask[i] / NumElts) \|\| (Mask[i] % NumElts) != i)
	break;
	}

	++SubvecElts;
	}
	assert(SubvecElts != NumElts && "Identity shuffle?");

	// Clip to a power 2.
	SubvecElts = PowerOf2Floor(SubvecElts);

	// Make sure the number of zeroable bits in the top at least covers the bits
	// not covered by the subvector.
	if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
	assert(Src >= 0 && "Expected a source!");
	MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
	SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
	Src == 0 ? V1 : V2,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
	DAG.getConstant(0, DL, VT),
	Extract, DAG.getIntPtrConstant(0, DL));
	}

	// Try a simple shift right with undef elements. Later we'll try with zeros.
	if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget,
	DAG))
	return Shift;

	// Try to match KSHIFTs.
	unsigned Offset = 0;
	for (SDValue V : { V1, V2 }) {
	unsigned Opcode;
	int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
	if (ShiftAmt >= 0) {
	MVT WideVT = VT;
	if ((!Subtarget.hasDQI() && NumElts == 8) \|\| NumElts < 8)
	WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
	SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
	DAG.getUNDEF(WideVT), V,
	DAG.getIntPtrConstant(0, DL));
	// Widened right shifts need two shifts to ensure we shift in zeroes.
	if (Opcode == X86ISD::KSHIFTR && WideVT != VT) {
	int WideElts = WideVT.getVectorNumElements();
	// Shift left to put the original vector in the MSBs of the new size.
	Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res,
	DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8));
	// Increase the shift amount to account for the left shift.
	ShiftAmt += WideElts - NumElts;
	}

	Res = DAG.getNode(Opcode, DL, WideVT, Res,
	DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	}
	Offset += NumElts; // Increment for next iteration.
	}

	// If we're broadcasting a SETCC result, try to broadcast the ops instead.
	// TODO: What other unary shuffles would benefit from this?
	if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
	V1->hasOneUse()) {
	SDValue Op0 = V1.getOperand(0);
	SDValue Op1 = V1.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
	EVT OpVT = Op0.getValueType();
	return DAG.getSetCC(
	DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
	DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
	}

	MVT ExtVT;
	switch (VT.SimpleTy) {
	default:
	llvm_unreachable("Expected a vector of i1 elements");
	case MVT::v2i1:
	ExtVT = MVT::v2i64;
	break;
	case MVT::v4i1:
	ExtVT = MVT::v4i32;
	break;
	case MVT::v8i1:
	// Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit
	// shuffle.
	ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64;
	break;
	case MVT::v16i1:
	// Take 512-bit type, unless we are avoiding 512-bit types and have the
	// 256-bit operation available.
	ExtVT = Subtarget.canExtendTo512DQ() ? MVT::v16i32 : MVT::v16i16;
	break;
	case MVT::v32i1:
	// Take 512-bit type, unless we are avoiding 512-bit types and have the
	// 256-bit operation available.
	assert(Subtarget.hasBWI() && "Expected AVX512BW support");
	ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8;
	break;
	case MVT::v64i1:
	// Fall back to scalarization. FIXME: We can do better if the shuffle
	// can be partitioned cleanly.
	if (!Subtarget.useBWIRegs())
	return SDValue();
	ExtVT = MVT::v64i8;
	break;
	}

	V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
	V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);

	SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask);
	// i1 was sign extended we can use X86ISD::CVT2MASK.
	int NumElems = VT.getVectorNumElements();
	if ((Subtarget.hasBWI() && (NumElems >= 32)) \|\|
	(Subtarget.hasDQI() && (NumElems < 32)))
	return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT),
	Shuffle, ISD::SETGT);

	return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
	}

	/// Helper function that returns true if the shuffle mask should be
	/// commuted to improve canonicalization.
	static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
	int NumElements = Mask.size();

	int NumV1Elements = 0, NumV2Elements = 0;
	for (int M : Mask)
	if (M < 0)
	continue;
	else if (M < NumElements)
	++NumV1Elements;
	else
	++NumV2Elements;

	// Commute the shuffle as needed such that more elements come from V1 than
	// V2. This allows us to match the shuffle pattern strictly on how many
	// elements come from V1 without handling the symmetric cases.
	if (NumV2Elements > NumV1Elements)
	return true;

	assert(NumV1Elements > 0 && "No V1 indices");

	if (NumV2Elements == 0)
	return false;

	// When the number of V1 and V2 elements are the same, try to minimize the
	// number of uses of V2 in the low half of the vector. When that is tied,
	// ensure that the sum of indices for V1 is equal to or lower than the sum
	// indices for V2. When those are equal, try to ensure that the number of odd
	// indices for V1 is lower than the number of odd indices for V2.
	if (NumV1Elements == NumV2Elements) {
	int LowV1Elements = 0, LowV2Elements = 0;
	for (int M : Mask.slice(0, NumElements / 2))
	if (M >= NumElements)
	++LowV2Elements;
	else if (M >= 0)
	++LowV1Elements;
	if (LowV2Elements > LowV1Elements)
	return true;
	if (LowV2Elements == LowV1Elements) {
	int SumV1Indices = 0, SumV2Indices = 0;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (Mask[i] >= NumElements)
	SumV2Indices += i;
	else if (Mask[i] >= 0)
	SumV1Indices += i;
	if (SumV2Indices < SumV1Indices)
	return true;
	if (SumV2Indices == SumV1Indices) {
	int NumV1OddIndices = 0, NumV2OddIndices = 0;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (Mask[i] >= NumElements)
	NumV2OddIndices += i % 2;
	else if (Mask[i] >= 0)
	NumV1OddIndices += i % 2;
	if (NumV2OddIndices < NumV1OddIndices)
	return true;
	}
	}
	}

	return false;
	}

	static bool canCombineAsMaskOperation(SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasAVX512())
	return false;

	MVT VT = V1.getSimpleValueType().getScalarType();
	if ((VT == MVT::i16 \|\| VT == MVT::i8) && !Subtarget.hasBWI())
	return false;

	// i8 is better to be widen to i16, because there is PBLENDW for vXi16
	// when the vector bit size is 128 or 256.
	if (VT == MVT::i8 && V1.getSimpleValueType().getSizeInBits() < 512)
	return false;

	auto HasMaskOperation = [&](SDValue V) {
	// TODO: Currently we only check limited opcode. We probably extend
	// it to all binary operation by checking TLI.isBinOp().
	switch (V->getOpcode()) {
	default:
	return false;
	case ISD::ADD:
	case ISD::SUB:
	case ISD::AND:
	case ISD::XOR:
	break;
	}
	if (!V->hasOneUse())
	return false;

	return true;
	};

	if (HasMaskOperation(V1) \|\| HasMaskOperation(V2))
	return true;

	return false;
	}

	// Forward declaration.
	static SDValue canonicalizeShuffleMaskWithHorizOp(
	MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
	unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget);

	/// Top-level lowering for x86 vector shuffles.
	///
	/// This handles decomposition, canonicalization, and lowering of all x86
	/// vector shuffles. Most of the specific lowering strategies are encapsulated
	/// above in helper routines. The canonicalization attempts to widen shuffles
	/// to involve fewer lanes of wider elements, consolidate symmetric patterns
	/// s.t. only one of the two inputs needs to be tested, etc.
	static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
	ArrayRef<int> OrigMask = SVOp->getMask();
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	MVT VT = Op.getSimpleValueType();
	int NumElements = VT.getVectorNumElements();
	SDLoc DL(Op);
	bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);

	assert((VT.getSizeInBits() != 64 \|\| Is1BitVector) &&
	"Can't lower MMX shuffles");

	bool V1IsUndef = V1.isUndef();
	bool V2IsUndef = V2.isUndef();
	if (V1IsUndef && V2IsUndef)
	return DAG.getUNDEF(VT);

	// When we create a shuffle node we put the UNDEF node to second operand,
	// but in some cases the first operand may be transformed to UNDEF.
	// In this case we should just commute the node.
	if (V1IsUndef)
	return DAG.getCommutedVectorShuffle(*SVOp);

	// Check for non-undef masks pointing at an undef vector and make the masks
	// undef as well. This makes it easier to match the shuffle based solely on
	// the mask.
	if (V2IsUndef &&
	any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
	SmallVector<int, 8> NewMask(OrigMask);
	for (int &M : NewMask)
	if (M >= NumElements)
	M = -1;
	return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
	}

	// Check for illegal shuffle mask element index values.
	int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
	(void)MaskUpperLimit;
	assert(llvm::all_of(OrigMask,
	[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
	"Out of bounds shuffle index");

	// We actually see shuffles that are entirely re-arrangements of a set of
	// zero inputs. This mostly happens while decomposing complex shuffles into
	// simple ones. Directly lower these as a buildvector of zeros.
	APInt KnownUndef, KnownZero;
	computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);

	APInt Zeroable = KnownUndef \| KnownZero;
	if (Zeroable.isAllOnes())
	return getZeroVector(VT, Subtarget, DAG, DL);

	bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());

	// Try to collapse shuffles into using a vector type with fewer elements but
	// wider element types. We cap this to not form integers or floating point
	// elements wider than 64 bits. It does not seem beneficial to form i128
	// integers to handle flipping the low and high halves of AVX 256-bit vectors.
	SmallVector<int, 16> WidenedMask;
	if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
	!canCombineAsMaskOperation(V1, V2, Subtarget) &&
	canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) {
	// Shuffle mask widening should not interfere with a broadcast opportunity
	// by obfuscating the operands with bitcasts.
	// TODO: Avoid lowering directly from this top-level function: make this
	// a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
	if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,
	Subtarget, DAG))
	return Broadcast;

	MVT NewEltVT = VT.isFloatingPoint()
	? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
	: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
	int NewNumElts = NumElements / 2;
	MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts);
	// Make sure that the new vector type is legal. For example, v2f64 isn't
	// legal on SSE1.
	if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
	if (V2IsZero) {
	// Modify the new Mask to take all zeros from the all-zero vector.
	// Choose indices that are blend-friendly.
	bool UsedZeroVector = false;
	assert(is_contained(WidenedMask, SM_SentinelZero) &&
	"V2's non-undef elements are used?!");
	for (int i = 0; i != NewNumElts; ++i)
	if (WidenedMask[i] == SM_SentinelZero) {
	WidenedMask[i] = i + NewNumElts;
	UsedZeroVector = true;
	}
	// Ensure all elements of V2 are zero - isBuildVectorAllZeros permits
	// some elements to be undef.
	if (UsedZeroVector)
	V2 = getZeroVector(NewVT, Subtarget, DAG, DL);
	}
	V1 = DAG.getBitcast(NewVT, V1);
	V2 = DAG.getBitcast(NewVT, V2);
	return DAG.getBitcast(
	VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask));
	}
	}

	SmallVector<SDValue> Ops = {V1, V2};
	SmallVector<int> Mask(OrigMask);

	// Canonicalize the shuffle with any horizontal ops inputs.
	// NOTE: This may update Ops and Mask.
	if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
	Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
	return DAG.getBitcast(VT, HOp);

	V1 = DAG.getBitcast(VT, Ops[0]);
	V2 = DAG.getBitcast(VT, Ops[1]);
	assert(NumElements == (int)Mask.size() &&
	"canonicalizeShuffleMaskWithHorizOp "
	"shouldn't alter the shuffle mask size");

	// Commute the shuffle if it will improve canonicalization.
	if (canonicalizeShuffleMaskWithCommute(Mask)) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(V1, V2);
	}

	// For each vector width, delegate to a specialized lowering routine.
	if (VT.is128BitVector())
	return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);

	if (VT.is256BitVector())
	return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);

	if (VT.is512BitVector())
	return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);

	if (Is1BitVector)
	return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);

	llvm_unreachable("Unimplemented!");
	}

	/// Try to lower a VSELECT instruction to a vector shuffle.
	static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Cond = Op.getOperand(0);
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	MVT VT = Op.getSimpleValueType();

	// Only non-legal VSELECTs reach this lowering, convert those into generic
	// shuffles and re-use the shuffle lowering path for blends.
	if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
	SmallVector<int, 32> Mask;
	if (createShuffleMaskFromVSELECT(Mask, Cond))
	return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);
	}

	return SDValue();
	}

	SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
	SDValue Cond = Op.getOperand(0);
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);

	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	if (isSoftFP16(VT)) {
	MVT NVT = VT.changeVectorElementTypeToInteger();
	return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond,
	DAG.getBitcast(NVT, LHS),
	DAG.getBitcast(NVT, RHS)));
	}

	// A vselect where all conditions and data are constants can be optimized into
	// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
	if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
	ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
	ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
	return SDValue();

	// Try to lower this to a blend-style vector shuffle. This can handle all
	// constant condition cases.
	if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
	return BlendOp;

	// If this VSELECT has a vector if i1 as a mask, it will be directly matched
	// with patterns on the mask registers on AVX-512.
	MVT CondVT = Cond.getSimpleValueType();
	unsigned CondEltSize = Cond.getScalarValueSizeInBits();
	if (CondEltSize == 1)
	return Op;

	// Variable blends are only legal from SSE4.1 onward.
	if (!Subtarget.hasSSE41())
	return SDValue();

	unsigned EltSize = VT.getScalarSizeInBits();
	unsigned NumElts = VT.getVectorNumElements();

	// Expand v32i16/v64i8 without BWI.
	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !Subtarget.hasBWI())
	return SDValue();

	// If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
	// into an i1 condition so that we can use the mask-based 512-bit blend
	// instructions.
	if (VT.getSizeInBits() == 512) {
	// Build a mask by testing the condition against zero.
	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
	SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
	DAG.getConstant(0, dl, CondVT),
	ISD::SETNE);
	// Now return a new VSELECT using the mask.
	return DAG.getSelect(dl, VT, Mask, LHS, RHS);
	}

	// SEXT/TRUNC cases where the mask doesn't match the destination size.
	if (CondEltSize != EltSize) {
	// If we don't have a sign splat, rely on the expansion.
	if (CondEltSize != DAG.ComputeNumSignBits(Cond))
	return SDValue();

	MVT NewCondSVT = MVT::getIntegerVT(EltSize);
	MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts);
	Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT);
	return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS);
	}

	// Only some types will be legal on some subtargets. If we can emit a legal
	// VSELECT-matching blend, return Op, and but if we need to expand, return
	// a null value.
	switch (VT.SimpleTy) {
	default:
	// Most of the vector types have blends past SSE4.1.
	return Op;

	case MVT::v32i8:
	// The byte blends for AVX vectors were introduced only in AVX2.
	if (Subtarget.hasAVX2())
	return Op;

	return SDValue();

	case MVT::v8i16:
	case MVT::v16i16: {
	// Bitcast everything to the vXi8 type and use a vXi8 vselect.
	MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2);
	Cond = DAG.getBitcast(CastVT, Cond);
	LHS = DAG.getBitcast(CastVT, LHS);
	RHS = DAG.getBitcast(CastVT, RHS);
	SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS);
	return DAG.getBitcast(VT, Select);
	}
	}
	}

	static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDValue Vec = Op.getOperand(0);
	SDValue Idx = Op.getOperand(1);
	assert(isa<ConstantSDNode>(Idx) && "Constant index expected");
	SDLoc dl(Op);

	if (!Vec.getSimpleValueType().is128BitVector())
	return SDValue();

	if (VT.getSizeInBits() == 8) {
	// If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
	// we're going to zero extend the register or fold the store.
	if (llvm::isNullConstant(Idx) && !X86::mayFoldIntoZeroExtend(Op) &&
	!X86::mayFoldIntoStore(Op))
	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Vec), Idx));

	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
	}

	if (VT == MVT::f32) {
	// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
	// the result back to FR32 register. It's only worth matching if the
	// result has a single use which is a store or a bitcast to i32. And in
	// the case of a store, it's not worth it if the index is a constant 0,
	// because a MOVSSmr can be used instead, which is smaller and faster.
	if (!Op.hasOneUse())
	return SDValue();
	SDNode User = Op.getNode()->use_begin();
	if ((User->getOpcode() != ISD::STORE \|\| isNullConstant(Idx)) &&
	(User->getOpcode() != ISD::BITCAST \|\|
	User->getValueType(0) != MVT::i32))
	return SDValue();
	SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Vec), Idx);
	return DAG.getBitcast(MVT::f32, Extract);
	}

	if (VT == MVT::i32 \|\| VT == MVT::i64)
	return Op;

	return SDValue();
	}

	/// Extract one bit from mask vector, like v16i1 or v8i1.
	/// AVX-512 feature.
	static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Vec = Op.getOperand(0);
	SDLoc dl(Vec);
	MVT VecVT = Vec.getSimpleValueType();
	SDValue Idx = Op.getOperand(1);
	auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
	MVT EltVT = Op.getSimpleValueType();

	assert((VecVT.getVectorNumElements() <= 16 \|\| Subtarget.hasBWI()) &&
	"Unexpected vector type in ExtractBitFromMaskVector");

	// variable index can't be handled in mask registers,
	// extend vector to VR512/128
	if (!IdxC) {
	unsigned NumElts = VecVT.getVectorNumElements();
	// Extending v8i1/v16i1 to 512-bit get better performance on KNL
	// than extending to 128/256bit.
	MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
	MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
	SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec);
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx);
	return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
	}

	unsigned IdxVal = IdxC->getZExtValue();
	if (IdxVal == 0) // the operation is legal
	return Op;

	// Extend to natively supported kshift.
	unsigned NumElems = VecVT.getVectorNumElements();
	MVT WideVecVT = VecVT;
	if ((!Subtarget.hasDQI() && NumElems == 8) \|\| NumElems < 8) {
	WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
	DAG.getUNDEF(WideVecVT), Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	// Use kshiftr instruction to move to the lower element.
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	SDValue
	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	MVT VecVT = Vec.getSimpleValueType();
	SDValue Idx = Op.getOperand(1);
	auto* IdxC = dyn_cast<ConstantSDNode>(Idx);

	if (VecVT.getVectorElementType() == MVT::i1)
	return ExtractBitFromMaskVector(Op, DAG, Subtarget);

	if (!IdxC) {
	// Its more profitable to go through memory (1 cycles throughput)
	// than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
	// IACA tool was used to get performance estimation
	// (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)
	//
	// example : extractelement <16 x i8> %a, i32 %i
	//
	// Block Throughput: 3.00 Cycles
	// Throughput Bottleneck: Port5
	//
	// \| Num Of \| Ports pressure in cycles \| \|
	// \| Uops \| 0 - DV \| 5 \| 6 \| 7 \| \|
	// ---------------------------------------------
	// \| 1 \| \| 1.0 \| \| \| CP \| vmovd xmm1, edi
	// \| 1 \| \| 1.0 \| \| \| CP \| vpshufb xmm0, xmm0, xmm1
	// \| 2 \| 1.0 \| 1.0 \| \| \| CP \| vpextrb eax, xmm0, 0x0
	// Total Num Of Uops: 4
	//
	//
	// Block Throughput: 1.00 Cycles
	// Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4
	//
	// \| \| Ports pressure in cycles \| \|
	// \|Uops\| 1 \| 2 - D \|3 - D \| 4 \| 5 \| \|
	// ---------------------------------------------------------
	// \|2^ \| \| 0.5 \| 0.5 \|1.0\| \|CP\| vmovaps xmmword ptr [rsp-0x18], xmm0
	// \|1 \|0.5\| \| \| \|0.5\| \| lea rax, ptr [rsp-0x18]
	// \|1 \| \|0.5, 0.5\|0.5, 0.5\| \| \|CP\| mov al, byte ptr [rdi+rax*1]
	// Total Num Of Uops: 4

	return SDValue();
	}

	unsigned IdxVal = IdxC->getZExtValue();

	// If this is a 256-bit vector result, first extract the 128-bit vector and
	// then extract the element from the 128-bit vector.
	if (VecVT.is256BitVector() \|\| VecVT.is512BitVector()) {
	// Get the 128-bit vector.
	Vec = extract128BitVector(Vec, IdxVal, DAG, dl);
	MVT EltVT = VecVT.getVectorElementType();

	unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

	// Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
	// this can be done with a mask.
	IdxVal &= ElemsPerChunk - 1;
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
	DAG.getIntPtrConstant(IdxVal, dl));
	}

	assert(VecVT.is128BitVector() && "Unexpected vector length");

	MVT VT = Op.getSimpleValueType();

	if (VT == MVT::i16) {
	// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
	// we're going to zero extend the register or fold the store (SSE41 only).
	if (IdxVal == 0 && !X86::mayFoldIntoZeroExtend(Op) &&
	!(Subtarget.hasSSE41() && X86::mayFoldIntoStore(Op))) {
	if (Subtarget.hasFP16())
	return Op;

	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Vec), Idx));
	}

	SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
	}

	if (Subtarget.hasSSE41())
	if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
	return Res;

	// TODO: We only extract a single element from v16i8, we can probably afford
	// to be more aggressive here before using the default approach of spilling to
	// stack.
	if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
	// Extract either the lowest i32 or any i16, and extract the sub-byte.
	int DWordIdx = IdxVal / 4;
	if (DWordIdx == 0) {
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Vec),
	DAG.getIntPtrConstant(DWordIdx, dl));
	int ShiftVal = (IdxVal % 4) * 8;
	if (ShiftVal != 0)
	Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res,
	DAG.getConstant(ShiftVal, dl, MVT::i8));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	}

	int WordIdx = IdxVal / 2;
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
	DAG.getBitcast(MVT::v8i16, Vec),
	DAG.getIntPtrConstant(WordIdx, dl));
	int ShiftVal = (IdxVal % 2) * 8;
	if (ShiftVal != 0)
	Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
	DAG.getConstant(ShiftVal, dl, MVT::i8));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	}

	if (VT == MVT::f16 \|\| VT.getSizeInBits() == 32) {
	if (IdxVal == 0)
	return Op;

	// Shuffle the element to the lowest element, then movss or movsh.
	SmallVector<int, 8> Mask(VecVT.getVectorNumElements(), -1);
	Mask[0] = static_cast<int>(IdxVal);
	Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	if (VT.getSizeInBits() == 64) {
	// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
	// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
	// to match extract_elt for f64.
	if (IdxVal == 0)
	return Op;

	// UNPCKHPD the element to the lowest double word, then movsd.
	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
	int Mask[2] = { 1, -1 };
	Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	return SDValue();
	}

	/// Insert one bit to mask vector, like v16i1 or v8i1.
	/// AVX-512 feature.
	static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	SDValue Elt = Op.getOperand(1);
	SDValue Idx = Op.getOperand(2);
	MVT VecVT = Vec.getSimpleValueType();

	if (!isa<ConstantSDNode>(Idx)) {
	// Non constant index. Extend source and destination,
	// insert element and then truncate the result.
	unsigned NumElts = VecVT.getVectorNumElements();
	MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
	MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
	SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
	DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
	DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
	return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
	}

	// Copy into a k-register, extract to v1i1 and insert_subvector.
	SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec, Idx);
	}

	SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = EltVT.getScalarSizeInBits();

	if (EltVT == MVT::i1)
	return InsertBitToMaskVector(Op, DAG, Subtarget);

	SDLoc dl(Op);
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	SDValue N2 = Op.getOperand(2);
	auto *N2C = dyn_cast<ConstantSDNode>(N2);

	if (!N2C) {
	// Variable insertion indices, usually we're better off spilling to stack,
	// but AVX512 can use a variable compare+select by comparing against all
	// possible vector indices, and FP insertion has less gpr->simd traffic.
	if (!(Subtarget.hasBWI() \|\|
	(Subtarget.hasAVX512() && EltSizeInBits >= 32) \|\|
	(Subtarget.hasSSE41() && (EltVT == MVT::f32 \|\| EltVT == MVT::f64))))
	return SDValue();

	MVT IdxSVT = MVT::getIntegerVT(EltSizeInBits);
	MVT IdxVT = MVT::getVectorVT(IdxSVT, NumElts);
	if (!isTypeLegal(IdxSVT) \|\| !isTypeLegal(IdxVT))
	return SDValue();

	SDValue IdxExt = DAG.getZExtOrTrunc(N2, dl, IdxSVT);
	SDValue IdxSplat = DAG.getSplatBuildVector(IdxVT, dl, IdxExt);
	SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1);

	SmallVector<SDValue, 16> RawIndices;
	for (unsigned I = 0; I != NumElts; ++I)
	RawIndices.push_back(DAG.getConstant(I, dl, IdxSVT));
	SDValue Indices = DAG.getBuildVector(IdxVT, dl, RawIndices);

	// inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
	return DAG.getSelectCC(dl, IdxSplat, Indices, EltSplat, N0,
	ISD::CondCode::SETEQ);
	}

	if (N2C->getAPIntValue().uge(NumElts))
	return SDValue();
	uint64_t IdxVal = N2C->getZExtValue();

	bool IsZeroElt = X86::isZeroNode(N1);
	bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);

	if (IsZeroElt \|\| IsAllOnesElt) {
	// Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend.
	// We don't deal with i8 0 since it appears to be handled elsewhere.
	if (IsAllOnesElt &&
	((VT == MVT::v16i8 && !Subtarget.hasSSE41()) \|\|
	((VT == MVT::v32i8 \|\| VT == MVT::v16i16) && !Subtarget.hasInt256()))) {
	SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());
	SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());
	SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst);
	CstVectorElts[IdxVal] = OnesCst;
	SDValue CstVector = DAG.getBuildVector(VT, dl, CstVectorElts);
	return DAG.getNode(ISD::OR, dl, VT, N0, CstVector);
	}
	// See if we can do this more efficiently with a blend shuffle with a
	// rematerializable vector.
	if (Subtarget.hasSSE41() &&
	(EltSizeInBits >= 16 \|\| (IsZeroElt && !VT.is128BitVector()))) {
	SmallVector<int, 8> BlendMask;
	for (unsigned i = 0; i != NumElts; ++i)
	BlendMask.push_back(i == IdxVal ? i + NumElts : i);
	SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
	: getOnesVector(VT, DAG, dl);
	return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
	}
	}

	// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
	// into that, and then insert the subvector back into the result.
	if (VT.is256BitVector() \|\| VT.is512BitVector()) {
	// With a 256-bit vector, we can insert into the zero element efficiently
	// using a blend if we have AVX or AVX2 and the right data type.
	if (VT.is256BitVector() && IdxVal == 0) {
	// TODO: It is worthwhile to cast integer to floating point and back
	// and incur a domain crossing penalty if that's what we'll end up
	// doing anyway after extracting to a 128-bit vector.
	if ((Subtarget.hasAVX() && (EltVT == MVT::f64 \|\| EltVT == MVT::f32)) \|\|
	(Subtarget.hasAVX2() && (EltVT == MVT::i32 \|\| EltVT == MVT::i64))) {
	SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
	return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
	DAG.getTargetConstant(1, dl, MVT::i8));
	}
	}

	unsigned NumEltsIn128 = 128 / EltSizeInBits;
	assert(isPowerOf2_32(NumEltsIn128) &&
	"Vectors will always have power-of-two number of elements.");

	// If we are not inserting into the low 128-bit vector chunk,
	// then prefer the broadcast+blend sequence.
	// FIXME: relax the profitability check iff all N1 uses are insertions.
	if (IdxVal >= NumEltsIn128 &&
	((Subtarget.hasAVX2() && EltSizeInBits != 8) \|\|
	(Subtarget.hasAVX() && (EltSizeInBits >= 32) &&
	X86::mayFoldLoad(N1, Subtarget)))) {
	SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);
	SmallVector<int, 8> BlendMask;
	for (unsigned i = 0; i != NumElts; ++i)
	BlendMask.push_back(i == IdxVal ? i + NumElts : i);
	return DAG.getVectorShuffle(VT, dl, N0, N1SplatVec, BlendMask);
	}

	// Get the desired 128-bit vector chunk.
	SDValue V = extract128BitVector(N0, IdxVal, DAG, dl);

	// Insert the element into the desired chunk.
	// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
	unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);

	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
	DAG.getIntPtrConstant(IdxIn128, dl));

	// Insert the changed part back into the bigger vector
	return insert128BitVector(N0, V, IdxVal, DAG, dl);
	}
	assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");

	// This will be just movw/movd/movq/movsh/movss/movsd.
	if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) {
	if (EltVT == MVT::i32 \|\| EltVT == MVT::f32 \|\| EltVT == MVT::f64 \|\|
	EltVT == MVT::f16 \|\| EltVT == MVT::i64) {
	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
	return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
	}

	// We can't directly insert an i8 or i16 into a vector, so zero extend
	// it to i32 first.
	if (EltVT == MVT::i16 \|\| EltVT == MVT::i8) {
	N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, N1);
	MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, N1);
	N1 = getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
	return DAG.getBitcast(VT, N1);
	}
	}

	// Transform it so it match pinsr{b,w} which expects a GR32 as its second
	// argument. SSE41 required for pinsrb.
	if (VT == MVT::v8i16 \|\| (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
	unsigned Opc;
	if (VT == MVT::v8i16) {
	assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW");
	Opc = X86ISD::PINSRW;
	} else {
	assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector");
	assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB");
	Opc = X86ISD::PINSRB;
	}

	assert(N1.getValueType() != MVT::i32 && "Unexpected VT");
	N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
	N2 = DAG.getTargetConstant(IdxVal, dl, MVT::i8);
	return DAG.getNode(Opc, dl, VT, N0, N1, N2);
	}

	if (Subtarget.hasSSE41()) {
	if (EltVT == MVT::f32) {
	// Bits [7:6] of the constant are the source select. This will always be
	// zero here. The DAG Combiner may combine an extract_elt index into
	// these bits. For example (insert (extract, 3), 2) could be matched by
	// putting the '3' into bits [7:6] of X86ISD::INSERTPS.
	// Bits [5:4] of the constant are the destination select. This is the
	// value of the incoming immediate.
	// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
	// combine either bitwise AND or insert of float 0.0 to set these bits.

	bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
	if (IdxVal == 0 && (!MinSize \|\| !X86::mayFoldLoad(N1, Subtarget))) {
	// If this is an insertion of 32-bits into the low 32-bits of
	// a vector, we prefer to generate a blend with immediate rather
	// than an insertps. Blends are simpler operations in hardware and so
	// will always have equal or better performance than insertps.
	// But if optimizing for size and there's a load folding opportunity,
	// generate insertps because blendps does not have a 32-bit memory
	// operand form.
	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
	return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1,
	DAG.getTargetConstant(1, dl, MVT::i8));
	}
	// Create this as a scalar to vector..
	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
	return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1,
	DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8));
	}

	// PINSR* works with constant index.
	if (EltVT == MVT::i32 \|\| EltVT == MVT::i64)
	return Op;
	}

	return SDValue();
	}

	static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT OpVT = Op.getSimpleValueType();

	// It's always cheaper to replace a xor+movd with xorps and simplifies further
	// combines.
	if (X86::isZeroNode(Op.getOperand(0)))
	return getZeroVector(OpVT, Subtarget, DAG, dl);

	// If this is a 256-bit vector result, first insert into a 128-bit
	// vector and then insert into the 256-bit vector.
	if (!OpVT.is128BitVector()) {
	// Insert into a 128-bit vector.
	unsigned SizeFactor = OpVT.getSizeInBits() / 128;
	MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
	OpVT.getVectorNumElements() / SizeFactor);

	Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));

	// Insert the 128-bit vector.
	return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
	}
	assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 &&
	"Expected an SSE type!");

	// Pass through a v4i32 or V8i16 SCALAR_TO_VECTOR as that's what we use in
	// tblgen.
	if (OpVT == MVT::v4i32 \|\| (OpVT == MVT::v8i16 && Subtarget.hasFP16()))
	return Op;

	SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
	return DAG.getBitcast(
	OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
	}

	// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
	// simple superregister reference or explicit instructions to insert
	// the upper bits of a vector.
	static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1);

	return insert1BitVector(Op, DAG, Subtarget);
	}

	static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
	"Only vXi1 extract_subvectors need custom lowering");

	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	uint64_t IdxVal = Op.getConstantOperandVal(1);

	if (IdxVal == 0) // the operation is legal
	return Op;

	MVT VecVT = Vec.getSimpleValueType();
	unsigned NumElems = VecVT.getVectorNumElements();

	// Extend to natively supported kshift.
	MVT WideVecVT = VecVT;
	if ((!Subtarget.hasDQI() && NumElems == 8) \|\| NumElems < 8) {
	WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
	DAG.getUNDEF(WideVecVT), Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	// Shift to the LSB.
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
	DAG.getTargetConstant(IdxVal, dl, MVT::i8));

	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	// Returns the appropriate wrapper opcode for a global reference.
	unsigned X86TargetLowering::getGlobalWrapperKind(
	const GlobalValue *GV, const unsigned char OpFlags) const {
	// References to absolute symbols are never PC-relative.
	if (GV && GV->isAbsoluteSymbolRef())
	return X86ISD::Wrapper;

	CodeModel::Model M = getTargetMachine().getCodeModel();
	if (Subtarget.isPICStyleRIPRel() &&
	(M == CodeModel::Small \|\| M == CodeModel::Kernel))
	return X86ISD::WrapperRIP;

	// In the medium model, functions can always be referenced RIP-relatively,
	// since they must be within 2GiB. This is also possible in non-PIC mode, and
	// shorter than the 64-bit absolute immediate that would otherwise be emitted.
	if (M == CodeModel::Medium && isa_and_nonnull<Function>(GV))
	return X86ISD::WrapperRIP;

	// GOTPCREL references must always use RIP.
	if (OpFlags == X86II::MO_GOTPCREL \|\| OpFlags == X86II::MO_GOTPCREL_NORELAX)
	return X86ISD::WrapperRIP;

	return X86ISD::Wrapper;
	}

	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
	// their target counterpart wrapped in the X86ISD::Wrapper node. Suppose N is
	// one of the above mentioned nodes. It has to be wrapped because otherwise
	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
	// be used to form addressing mode. These wrapped nodes will be selected
	// into MOV32ri.
	SDValue
	X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetConstantPool(
	CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag);
	SDLoc DL(CP);
	Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
	// With PIC, the address is actually $g + Offset.
	if (OpFlag) {
	Result =
	DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
	}

	return Result;
	}

	SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
	SDLoc DL(JT);
	Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (OpFlag)
	Result =
	DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);

	return Result;
	}

	SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
	SelectionDAG &DAG) const {
	return LowerGlobalOrExternal(Op, DAG, /ForCall=/false);
	}

	SDValue
	X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
	// Create the TargetBlockAddressAddress node.
	unsigned char OpFlags =
	Subtarget.classifyBlockAddressReference();
	const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
	int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
	SDLoc dl(Op);
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
	Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (isGlobalRelativeToPICBase(OpFlags)) {
	Result = DAG.getNode(ISD::ADD, dl, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
	}

	return Result;
	}

	/// Creates target global address or external symbol nodes for calls or
	/// other uses.
	SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
	bool ForCall) const {
	// Unpack the global address or external symbol.
	const SDLoc &dl = SDLoc(Op);
	const GlobalValue *GV = nullptr;
	int64_t Offset = 0;
	const char *ExternalSym = nullptr;
	if (const auto *G = dyn_cast<GlobalAddressSDNode>(Op)) {
	GV = G->getGlobal();
	Offset = G->getOffset();
	} else {
	const auto *ES = cast<ExternalSymbolSDNode>(Op);
	ExternalSym = ES->getSymbol();
	}

	// Calculate some flags for address lowering.
	const Module &Mod = *DAG.getMachineFunction().getFunction().getParent();
	unsigned char OpFlags;
	if (ForCall)
	OpFlags = Subtarget.classifyGlobalFunctionReference(GV, Mod);
	else
	OpFlags = Subtarget.classifyGlobalReference(GV, Mod);
	bool HasPICReg = isGlobalRelativeToPICBase(OpFlags);
	bool NeedsLoad = isGlobalStubReference(OpFlags);

	CodeModel::Model M = DAG.getTarget().getCodeModel();
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result;

	if (GV) {
	// Create a target global address if this is a global. If possible, fold the
	// offset into the global address reference. Otherwise, ADD it on later.
	// Suppress the folding if Offset is negative: movl foo-1, %eax is not
	// allowed because if the address of foo is 0, the ELF R_X86_64_32
	// relocation will compute to a negative value, which is invalid.
	int64_t GlobalOffset = 0;
	if (OpFlags == X86II::MO_NO_FLAG && Offset >= 0 &&
	X86::isOffsetSuitableForCodeModel(Offset, M, true)) {
	std::swap(GlobalOffset, Offset);
	}
	Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags);
	} else {
	// If this is not a global address, this must be an external symbol.
	Result = DAG.getTargetExternalSymbol(ExternalSym, PtrVT, OpFlags);
	}

	// If this is a direct call, avoid the wrapper if we don't need to do any
	// loads or adds. This allows SDAG ISel to match direct calls.
	if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0)
	return Result;

	Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (HasPICReg) {
	Result = DAG.getNode(ISD::ADD, dl, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
	}

	// For globals that require a load from a stub to get the address, emit the
	// load.
	if (NeedsLoad)
	Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));

	// If there was a non-zero offset that we didn't fold, create an explicit
	// addition for it.
	if (Offset != 0)
	Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result,
	DAG.getConstant(Offset, dl, PtrVT));

	return Result;
	}

	SDValue
	X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
	return LowerGlobalOrExternal(Op, DAG, /ForCall=/false);
	}

	static SDValue
	GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
	SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
	unsigned char OperandFlags, bool LocalDynamic = false) {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDLoc dl(GA);
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(),
	OperandFlags);

	X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
	: X86ISD::TLSADDR;

	if (InFlag) {
	SDValue Ops[] = { Chain, TGA, *InFlag };
	Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
	} else {
	SDValue Ops[] = { Chain, TGA };
	Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
	}

	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
	MFI.setAdjustsStack(true);
	MFI.setHasCalls(true);

	SDValue Flag = Chain.getValue(1);
	return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
	}

	// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
	static SDValue
	LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT) {
	SDValue InFlag;
	SDLoc dl(GA); // ? function entry point might be better
	SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
	DAG.getNode(X86ISD::GlobalBaseReg,
	SDLoc(), PtrVT), InFlag);
	InFlag = Chain.getValue(1);

	return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
	}

	// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
	static SDValue
	LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT) {
	return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
	X86::RAX, X86II::MO_TLSGD);
	}

	// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
	static SDValue
	LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT) {
	return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
	X86::EAX, X86II::MO_TLSGD);
	}

	static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
	SelectionDAG &DAG, const EVT PtrVT,
	bool Is64Bit, bool Is64BitLP64) {
	SDLoc dl(GA);

	// Get the start address of the TLS block for this module.
	X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
	.getInfo<X86MachineFunctionInfo>();
	MFI->incNumLocalDynamicTLSAccesses();

	SDValue Base;
	if (Is64Bit) {
	unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
	Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
	X86II::MO_TLSLD, /LocalDynamic=/true);
	} else {
	SDValue InFlag;
	SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
	InFlag = Chain.getValue(1);
	Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
	X86II::MO_TLSLDM, /LocalDynamic=/true);
	}

	// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
	// of Base.

	// Build x@dtpoff.
	unsigned char OperandFlags = X86II::MO_DTPOFF;
	unsigned WrapperKind = X86ISD::Wrapper;
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(), OperandFlags);
	SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);

	// Add x@dtpoff with the base.
	return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
	}

	// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
	static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT, TLSModel::Model model,
	bool is64Bit, bool isPIC) {
	SDLoc dl(GA);

	// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
	Value Ptr = Constant::getNullValue(Type::getInt8PtrTy(DAG.getContext(),
	is64Bit ? 257 : 256));

	SDValue ThreadPointer =
	DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
	MachinePointerInfo(Ptr));

	unsigned char OperandFlags = 0;
	// Most TLS accesses are not RIP relative, even on x86-64. One exception is
	// initialexec.
	unsigned WrapperKind = X86ISD::Wrapper;
	if (model == TLSModel::LocalExec) {
	OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
	} else if (model == TLSModel::InitialExec) {
	if (is64Bit) {
	OperandFlags = X86II::MO_GOTTPOFF;
	WrapperKind = X86ISD::WrapperRIP;
	} else {
	OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
	}
	} else {
	llvm_unreachable("Unexpected model");
	}

	// emit "addl x@ntpoff,%eax" (local exec)
	// or "addl x@indntpoff,%eax" (initial exec)
	// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
	SDValue TGA =
	DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
	GA->getOffset(), OperandFlags);
	SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);

	if (model == TLSModel::InitialExec) {
	if (isPIC && !is64Bit) {
	Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
	Offset);
	}

	Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	}

	// The address of the thread local variable is the add of the thread
	// pointer with the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
	}

	SDValue
	X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {

	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

	if (DAG.getTarget().useEmulatedTLS())
	return LowerToTLSEmulatedModel(GA, DAG);

	const GlobalValue *GV = GA->getGlobal();
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	bool PositionIndependent = isPositionIndependent();

	if (Subtarget.isTargetELF()) {
	TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
	switch (model) {
	case TLSModel::GeneralDynamic:
	if (Subtarget.is64Bit()) {
	if (Subtarget.isTarget64BitLP64())
	return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
	return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
	}
	return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
	case TLSModel::LocalDynamic:
	return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
	Subtarget.isTarget64BitLP64());
	case TLSModel::InitialExec:
	case TLSModel::LocalExec:
	return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
	PositionIndependent);
	}
	llvm_unreachable("Unknown TLS model.");
	}

	if (Subtarget.isTargetDarwin()) {
	// Darwin only has one model of TLS. Lower to that.
	unsigned char OpFlag = 0;
	unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ?
	X86ISD::WrapperRIP : X86ISD::Wrapper;

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	bool PIC32 = PositionIndependent && !Subtarget.is64Bit();
	if (PIC32)
	OpFlag = X86II::MO_TLVP_PIC_BASE;
	else
	OpFlag = X86II::MO_TLVP;
	SDLoc DL(Op);
	SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
	GA->getValueType(0),
	GA->getOffset(), OpFlag);
	SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);

	// With PIC32, the address is actually $g + Offset.
	if (PIC32)
	Offset = DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
	Offset);

	// Lowering the machine isd will make sure everything is in the right
	// location.
	SDValue Chain = DAG.getEntryNode();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
	SDValue Args[] = { Chain, Offset };
	Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
	Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), DL);

	// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setAdjustsStack(true);

	// And our return value (tls address) is in the standard call return value
	// location.
	unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
	return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
	}

	if (Subtarget.isOSWindows()) {
	// Just use the implicit TLS architecture
	// Need to generate something similar to:
	// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
	// ; from TEB
	// mov ecx, dword [rel _tls_index]: Load index (from C runtime)
	// mov rcx, qword [rdx+rcx*8]
	// mov eax, .tls$:tlsvar
	// [rax+rcx] contains the address
	// Windows 64bit: gs:0x58
	// Windows 32bit: fs:__tls_array

	SDLoc dl(GA);
	SDValue Chain = DAG.getEntryNode();

	// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
	// %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
	// use its literal value of 0x2C.
	Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
	? Type::getInt8PtrTy(*DAG.getContext(),
	256)
	: Type::getInt32PtrTy(*DAG.getContext(),
	257));

	SDValue TlsArray = Subtarget.is64Bit()
	? DAG.getIntPtrConstant(0x58, dl)
	: (Subtarget.isTargetWindowsGNU()
	? DAG.getIntPtrConstant(0x2C, dl)
	: DAG.getExternalSymbol("_tls_array", PtrVT));

	SDValue ThreadPointer =
	DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr));

	SDValue res;
	if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
	res = ThreadPointer;
	} else {
	// Load the _tls_index variable
	SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
	if (Subtarget.is64Bit())
	IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
	MachinePointerInfo(), MVT::i32);
	else
	IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo());

	const DataLayout &DL = DAG.getDataLayout();
	SDValue Scale =
	DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8);
	IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);

	res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX);
	}

	res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo());

	// Get the offset of start of .tls section
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(), X86II::MO_SECREL);
	SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);

	// The address of the thread local variable is the add of the thread
	// pointer with the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset);
	}

	llvm_unreachable("TLS not implemented for this target.");
	}

	/// Lower SRA_PARTS and friends, which return two i32 values
	/// and take a 2 x i32 value to shift plus a shift amount.
	/// TODO: Can this be moved to general expansion code?
	static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
	SDValue Lo, Hi;
	DAG.getTargetLoweringInfo().expandShiftParts(Op.getNode(), Lo, Hi, DAG);
	return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
	}

	// Try to use a packed vector operation to handle i64 on 32-bit targets when
	// AVX512DQ is enabled.
	static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert((Op.getOpcode() == ISD::SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_UINT_TO_FP \|\|
	Op.getOpcode() == ISD::UINT_TO_FP) &&
	"Unexpected opcode!");
	bool IsStrict = Op->isStrictFPOpcode();
	unsigned OpNo = IsStrict ? 1 : 0;
	SDValue Src = Op.getOperand(OpNo);
	MVT SrcVT = Src.getSimpleValueType();
	MVT VT = Op.getSimpleValueType();

	if (!Subtarget.hasDQI() \|\| SrcVT != MVT::i64 \|\| Subtarget.is64Bit() \|\|
	(VT != MVT::f32 && VT != MVT::f64))
	return SDValue();

	// Pack the i64 into a vector, do the operation and extract.

	// Using 256-bit to ensure result is 128-bits for f32 case.
	unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
	MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts);
	MVT VecVT = MVT::getVectorVT(VT, NumElts);

	SDLoc dl(Op);
	SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
	if (IsStrict) {
	SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
	{Op.getOperand(0), InVec});
	SDValue Chain = CvtVec.getValue(1);
	SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getMergeValues({Value, Chain}, dl);
	}

	SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
	DAG.getIntPtrConstant(0, dl));
	}

	// Try to use a packed vector operation to handle i64 on 32-bit targets.
	static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert((Op.getOpcode() == ISD::SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_UINT_TO_FP \|\|
	Op.getOpcode() == ISD::UINT_TO_FP) &&
	"Unexpected opcode!");
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	MVT SrcVT = Src.getSimpleValueType();
	MVT VT = Op.getSimpleValueType();

	if (SrcVT != MVT::i64 \|\| Subtarget.is64Bit() \|\| VT != MVT::f16)
	return SDValue();

	// Pack the i64 into a vector, do the operation and extract.

	assert(Subtarget.hasFP16() && "Expected FP16");

	SDLoc dl(Op);
	SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
	if (IsStrict) {
	SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
	{Op.getOperand(0), InVec});
	SDValue Chain = CvtVec.getValue(1);
	SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getMergeValues({Value, Chain}, dl);
	}

	SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
	DAG.getIntPtrConstant(0, dl));
	}

	static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
	const X86Subtarget &Subtarget) {
	switch (Opcode) {
	case ISD::SINT_TO_FP:
	// TODO: Handle wider types with AVX/AVX512.
	if (!Subtarget.hasSSE2() \|\| FromVT != MVT::v4i32)
	return false;
	// CVTDQ2PS or (V)CVTDQ2PD
	return ToVT == MVT::v4f32 \|\| (Subtarget.hasAVX() && ToVT == MVT::v4f64);

	case ISD::UINT_TO_FP:
	// TODO: Handle wider types and i64 elements.
	if (!Subtarget.hasAVX512() \|\| FromVT != MVT::v4i32)
	return false;
	// VCVTUDQ2PS or VCVTUDQ2PD
	return ToVT == MVT::v4f32 \|\| ToVT == MVT::v4f64;

	default:
	return false;
	}
	}

	/// Given a scalar cast operation that is extracted from a vector, try to
	/// vectorize the cast op followed by extraction. This will avoid an expensive
	/// round-trip between XMM and GPR.
	static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// TODO: This could be enhanced to handle smaller integer types by peeking
	// through an extend.
	SDValue Extract = Cast.getOperand(0);
	MVT DestVT = Cast.getSimpleValueType();
	if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Extract.getOperand(1)))
	return SDValue();

	// See if we have a 128-bit vector cast op for this type of cast.
	SDValue VecOp = Extract.getOperand(0);
	MVT FromVT = VecOp.getSimpleValueType();
	unsigned NumEltsInXMM = 128 / FromVT.getScalarSizeInBits();
	MVT Vec128VT = MVT::getVectorVT(FromVT.getScalarType(), NumEltsInXMM);
	MVT ToVT = MVT::getVectorVT(DestVT, NumEltsInXMM);
	if (!useVectorCast(Cast.getOpcode(), Vec128VT, ToVT, Subtarget))
	return SDValue();

	// If we are extracting from a non-zero element, first shuffle the source
	// vector to allow extracting from element zero.
	SDLoc DL(Cast);
	if (!isNullConstant(Extract.getOperand(1))) {
	SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1);
	Mask[0] = Extract.getConstantOperandVal(1);
	VecOp = DAG.getVectorShuffle(FromVT, DL, VecOp, DAG.getUNDEF(FromVT), Mask);
	}
	// If the source vector is wider than 128-bits, extract the low part. Do not
	// create an unnecessarily wide vector cast op.
	if (FromVT != Vec128VT)
	VecOp = extract128BitVector(VecOp, 0, DAG, DL);

	// cast (extelt V, 0) --> extelt (cast (extract_subv V)), 0
	// cast (extelt V, C) --> extelt (cast (extract_subv (shuffle V, [C...]))), 0
	SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestVT, VCast,
	DAG.getIntPtrConstant(0, DL));
	}

	/// Given a scalar cast to FP with a cast to integer operand (almost an ftrunc),
	/// try to vectorize the cast ops. This will avoid an expensive round-trip
	/// between XMM and GPR.
	static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// TODO: Allow FP_TO_UINT.
	SDValue CastToInt = CastToFP.getOperand(0);
	MVT VT = CastToFP.getSimpleValueType();
	if (CastToInt.getOpcode() != ISD::FP_TO_SINT \|\| VT.isVector())
	return SDValue();

	MVT IntVT = CastToInt.getSimpleValueType();
	SDValue X = CastToInt.getOperand(0);
	MVT SrcVT = X.getSimpleValueType();
	if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
	return SDValue();

	// See if we have 128-bit vector cast instructions for this type of cast.
	// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
	if (!Subtarget.hasSSE2() \|\| (VT != MVT::f32 && VT != MVT::f64) \|\|
	IntVT != MVT::i32)
	return SDValue();

	unsigned SrcSize = SrcVT.getSizeInBits();
	unsigned IntSize = IntVT.getSizeInBits();
	unsigned VTSize = VT.getSizeInBits();
	MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
	MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
	MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);

	// We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
	unsigned ToIntOpcode =
	SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
	unsigned ToFPOpcode =
	IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;

	// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
	//
	// We are not defining the high elements (for example, zero them) because
	// that could nullify any performance advantage that we hoped to gain from
	// this vector op hack. We do not expect any adverse effects (like denorm
	// penalties) with cast ops.
	SDLoc DL(CastToFP);
	SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
	SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X);
	SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
	SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
	}

	static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(Op);
	bool IsStrict = Op->isStrictFPOpcode();
	MVT VT = Op->getSimpleValueType(0);
	SDValue Src = Op->getOperand(IsStrict ? 1 : 0);

	if (Subtarget.hasDQI()) {
	assert(!Subtarget.hasVLX() && "Unexpected features");

	assert((Src.getSimpleValueType() == MVT::v2i64 \|\|
	Src.getSimpleValueType() == MVT::v4i64) &&
	"Unsupported custom type");

	// With AVX512DQ, but not VLX we need to widen to get a 512-bit result type.
	assert((VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\| VT == MVT::v4f64) &&
	"Unexpected VT!");
	MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;

	// Need to concat with zero vector for strict fp to avoid spurious
	// exceptions.
	SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64)
	: DAG.getUNDEF(MVT::v8i64);
	Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src,
	DAG.getIntPtrConstant(0, DL));
	SDValue Res, Chain;
	if (IsStrict) {
	Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other},
	{Op->getOperand(0), Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src);
	}

	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, DL);
	return Res;
	}

	bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP \|\|
	Op->getOpcode() == ISD::STRICT_SINT_TO_FP;
	if (VT != MVT::v4f32 \|\| IsSigned)
	return SDValue();

	SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64);
	SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
	SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64,
	DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One),
	DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One));
	SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT);
	SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src);
	SmallVector<SDValue, 4> SignCvts(4);
	SmallVector<SDValue, 4> Chains(4);
	for (int i = 0; i != 4; ++i) {
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc,
	DAG.getIntPtrConstant(i, DL));
	if (IsStrict) {
	SignCvts[i] =
	DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other},
	{Op.getOperand(0), Elt});
	Chains[i] = SignCvts[i].getValue(1);
	} else {
	SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Elt);
	}
	}
	SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts);

	SDValue Slow, Chain;
	if (IsStrict) {
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
	Slow = DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v4f32, MVT::Other},
	{Chain, SignCvt, SignCvt});
	Chain = Slow.getValue(1);
	} else {
	Slow = DAG.getNode(ISD::FADD, DL, MVT::v4f32, SignCvt, SignCvt);
	}

	IsNeg = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i32, IsNeg);
	SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt);

	if (IsStrict)
	return DAG.getMergeValues({Cvt, Chain}, DL);

	return Cvt;
	}

	static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
	MVT VT = Op.getSimpleValueType();
	MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
	SDLoc dl(Op);

	SDValue Rnd = DAG.getIntPtrConstant(0, dl);
	if (IsStrict)
	return DAG.getNode(
	ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
	{Chain,
	DAG.getNode(Op.getOpcode(), dl, {NVT, MVT::Other}, {Chain, Src}),
	Rnd});
	return DAG.getNode(ISD::FP_ROUND, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd);
	}

	static bool isLegalConversion(MVT VT, bool IsSigned,
	const X86Subtarget &Subtarget) {
	if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned)
	return true;
	if (VT == MVT::v8i32 && Subtarget.hasAVX() && IsSigned)
	return true;
	if (Subtarget.hasVLX() && (VT == MVT::v4i32 \|\| VT == MVT::v8i32))
	return true;
	if (Subtarget.useAVX512Regs()) {
	if (VT == MVT::v16i32)
	return true;
	if (VT == MVT::v8i64 && Subtarget.hasDQI())
	return true;
	}
	if (Subtarget.hasDQI() && Subtarget.hasVLX() &&
	(VT == MVT::v2i64 \|\| VT == MVT::v4i64))
	return true;
	return false;
	}

	SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	unsigned OpNo = IsStrict ? 1 : 0;
	SDValue Src = Op.getOperand(OpNo);
	SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
	MVT SrcVT = Src.getSimpleValueType();
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	if (isSoftFP16(VT))
	return promoteXINT_TO_FP(Op, DAG);
	else if (isLegalConversion(SrcVT, true, Subtarget))
	return Op;

	if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
	return LowerWin64_INT128_TO_FP(Op, DAG);

	if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
	return Extract;

	if (SDValue R = lowerFPToIntToFP(Op, DAG, Subtarget))
	return R;

	if (SrcVT.isVector()) {
	if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
	// Note: Since v2f64 is a legal type. We don't need to zero extend the
	// source for strict FP.
	if (IsStrict)
	return DAG.getNode(
	X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},
	{Chain, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
	DAG.getUNDEF(SrcVT))});
	return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
	DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
	DAG.getUNDEF(SrcVT)));
	}
	if (SrcVT == MVT::v2i64 \|\| SrcVT == MVT::v4i64)
	return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget);

	return SDValue();
	}

	assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
	"Unknown SINT_TO_FP to lower!");

	bool UseSSEReg = isScalarFPTypeInSSEReg(VT);

	// These are really Legal; return the operand so the caller accepts it as
	// Legal.
	if (SrcVT == MVT::i32 && UseSSEReg)
	return Op;
	if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit())
	return Op;

	if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
	return V;
	if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
	return V;

	// SSE doesn't have an i16 conversion so we need to promote.
	if (SrcVT == MVT::i16 && (UseSSEReg \|\| VT == MVT::f128)) {
	SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
	{Chain, Ext});

	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
	}

	if (VT == MVT::f128 \|\| !Subtarget.hasX87())
	return SDValue();

	SDValue ValueToStore = Src;
	if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit())
	// Bitcasting to f64 here allows us to do a single 64-bit store from
	// an SSE register, avoiding the store forwarding penalty that would come
	// with two 32-bit stores.
	ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);

	unsigned Size = SrcVT.getStoreSize();
	Align Alignment(Size);
	MachineFunction &MF = DAG.getMachineFunction();
	auto PtrVT = getPointerTy(MF.getDataLayout());
	int SSFI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false);
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
	Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment);
	std::pair<SDValue, SDValue> Tmp =
	BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG);

	if (IsStrict)
	return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);

	return Tmp.first;
	}

	std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(
	EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer,
	MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const {
	// Build the FILD
	SDVTList Tys;
	bool useSSE = isScalarFPTypeInSSEReg(DstVT);
	if (useSSE)
	Tys = DAG.getVTList(MVT::f80, MVT::Other);
	else
	Tys = DAG.getVTList(DstVT, MVT::Other);

	SDValue FILDOps[] = {Chain, Pointer};
	SDValue Result =
	DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, PtrInfo,
	Alignment, MachineMemOperand::MOLoad);
	Chain = Result.getValue(1);

	if (useSSE) {
	MachineFunction &MF = DAG.getMachineFunction();
	unsigned SSFISize = DstVT.getStoreSize();
	int SSFI =
	MF.getFrameInfo().CreateStackObject(SSFISize, Align(SSFISize), false);
	auto PtrVT = getPointerTy(MF.getDataLayout());
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
	Tys = DAG.getVTList(MVT::Other);
	SDValue FSTOps[] = {Chain, Result, StackSlot};
	MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
	MachineMemOperand::MOStore, SSFISize, Align(SSFISize));

	Chain =
	DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps, DstVT, StoreMMO);
	Result = DAG.getLoad(
	DstVT, DL, Chain, StackSlot,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
	Chain = Result.getValue(1);
	}

	return { Result, Chain };
	}

	/// Horizontal vector math instructions may be slower than normal math with
	/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
	/// implementation, and likely shuffle complexity of the alternate sequence.
	static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	bool IsOptimizingSize = DAG.shouldOptForSize();
	bool HasFastHOps = Subtarget.hasFastHorizontalOps();
	return !IsSingleSource \|\| IsOptimizingSize \|\| HasFastHOps;
	}

	/// 64-bit unsigned integer to double expansion.
	static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// We can't use this algorithm for strict fp. It produces -0.0 instead of +0.0
	// when converting 0 when rounding toward negative infinity. Caller will
	// fall back to Expand for when i64 or is legal or use FILD in 32-bit mode.
	assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!");
	// This algorithm is not obvious. Here it is what we're trying to output:
	/*
	movq %rax, %xmm0
	punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
	subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
	#ifdef __SSE3__
	haddpd %xmm0, %xmm0
	#else
	pshufd $0x4e, %xmm0, %xmm1
	addpd %xmm1, %xmm0
	#endif
	*/

	SDLoc dl(Op);
	LLVMContext *Context = DAG.getContext();

	// Build some magic constants.
	static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
	Constant C0 = ConstantDataVector::get(Context, CV0);
	auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16));

	SmallVector<Constant*,2> CV1;
	CV1.push_back(
	ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
	APInt(64, 0x4330000000000000ULL))));
	CV1.push_back(
	ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
	APInt(64, 0x4530000000000000ULL))));
	Constant *C1 = ConstantVector::get(CV1);
	SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16));

	// Load the 64-bit value into an XMM register.
	SDValue XR1 =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0));
	SDValue CLod0 = DAG.getLoad(
	MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16));
	SDValue Unpck1 =
	getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);

	SDValue CLod1 = DAG.getLoad(
	MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16));
	SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
	SDValue Result;

	if (Subtarget.hasSSE3() &&
	shouldUseHorizontalOp(true, DAG, Subtarget)) {
	Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
	} else {
	SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
	Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
	}
	Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
	DAG.getIntPtrConstant(0, dl));
	return Result;
	}

	/// 32-bit unsigned integer to float expansion.
	static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
	SDLoc dl(Op);
	// FP constant to bias correct the final result.
	SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
	MVT::f64);

	// Load the 32-bit value into an XMM register.
	SDValue Load =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo));

	// Zero out the upper parts of the register.
	Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);

	// Or the load with the bias.
	SDValue Or = DAG.getNode(
	ISD::OR, dl, MVT::v2i64,
	DAG.getBitcast(MVT::v2i64, Load),
	DAG.getBitcast(MVT::v2i64,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
	Or =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
	DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));

	if (Op.getNode()->isStrictFPOpcode()) {
	// Subtract the bias.
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Chain = Op.getOperand(0);
	SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
	{Chain, Or, Bias});

	if (Op.getValueType() == Sub.getValueType())
	return Sub;

	// Handle final rounding.
	std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
	Sub, Sub.getValue(1), dl, Op.getSimpleValueType());

	return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
	}

	// Subtract the bias.
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);

	// Handle final rounding.
	return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType());
	}

	static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	const SDLoc &DL) {
	if (Op.getSimpleValueType() != MVT::v2f64)
	return SDValue();

	bool IsStrict = Op->isStrictFPOpcode();

	SDValue N0 = Op.getOperand(IsStrict ? 1 : 0);
	assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");

	if (Subtarget.hasAVX512()) {
	if (!Subtarget.hasVLX()) {
	// Let generic type legalization widen this.
	if (!IsStrict)
	return SDValue();
	// Otherwise pad the integer input with 0s and widen the operation.
	N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
	DAG.getConstant(0, DL, MVT::v2i32));
	SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other},
	{Op.getOperand(0), N0});
	SDValue Chain = Res.getValue(1);
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2f64, Res,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getMergeValues({Res, Chain}, DL);
	}

	// Legalize to v4i32 type.
	N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
	DAG.getUNDEF(MVT::v2i32));
	if (IsStrict)
	return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other},
	{Op.getOperand(0), N0});
	return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
	}

	// Zero extend to 2i64, OR with the floating point representation of 2^52.
	// This gives us the floating point equivalent of 2^52 + the i32 integer
	// since double has 52-bits of mantissa. Then subtract 2^52 in floating
	// point leaving just our i32 integers in double format.
	SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0);
	SDValue VBias =
	DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64);
	SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn,
	DAG.getBitcast(MVT::v2i64, VBias));
	Or = DAG.getBitcast(MVT::v2f64, Or);

	if (IsStrict)
	return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other},
	{Op.getOperand(0), Or, VBias});
	return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias);
	}

	static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(Op);
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue V = Op->getOperand(IsStrict ? 1 : 0);
	MVT VecIntVT = V.getSimpleValueType();
	assert((VecIntVT == MVT::v4i32 \|\| VecIntVT == MVT::v8i32) &&
	"Unsupported custom type");

	if (Subtarget.hasAVX512()) {
	// With AVX512, but not VLX we need to widen to get a 512-bit result type.
	assert(!Subtarget.hasVLX() && "Unexpected features");
	MVT VT = Op->getSimpleValueType(0);

	// v8i32->v8f64 is legal with AVX512 so just return it.
	if (VT == MVT::v8f64)
	return Op;

	assert((VT == MVT::v4f32 \|\| VT == MVT::v8f32 \|\| VT == MVT::v4f64) &&
	"Unexpected VT!");
	MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
	MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
	// Need to concat with zero vector for strict fp to avoid spurious
	// exceptions.
	SDValue Tmp =
	IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT);
	V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V,
	DAG.getIntPtrConstant(0, DL));
	SDValue Res, Chain;
	if (IsStrict) {
	Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other},
	{Op->getOperand(0), V});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V);
	}

	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, DL);
	return Res;
	}

	if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 &&
	Op->getSimpleValueType(0) == MVT::v4f64) {
	SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V);
	Constant *Bias = ConstantFP::get(
	*DAG.getContext(),
	APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL)));
	auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, Align(8));
	SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other);
	SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
	SDValue VBias = DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(8),
	MachineMemOperand::MOLoad);

	SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn,
	DAG.getBitcast(MVT::v4i64, VBias));
	Or = DAG.getBitcast(MVT::v4f64, Or);

	if (IsStrict)
	return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v4f64, MVT::Other},
	{Op.getOperand(0), Or, VBias});
	return DAG.getNode(ISD::FSUB, DL, MVT::v4f64, Or, VBias);
	}

	// The algorithm is the following:
	// #ifdef __SSE4_1__
	// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
	// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
	// (uint4) 0x53000000, 0xaa);
	// #else
	// uint4 lo = (v & (uint4) 0xffff) \| (uint4) 0x4b000000;
	// uint4 hi = (v >> 16) \| (uint4) 0x53000000;
	// #endif
	// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
	// return (float4) lo + fhi;

	bool Is128 = VecIntVT == MVT::v4i32;
	MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
	// If we convert to something else than the supported type, e.g., to v4f64,
	// abort early.
	if (VecFloatVT != Op->getSimpleValueType(0))
	return SDValue();

	// In the #idef/#else code, we have in common:
	// - The vector of constants:
	// -- 0x4b000000
	// -- 0x53000000
	// - A shift:
	// -- v >> 16

	// Create the splat vector for 0x4b000000.
	SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT);
	// Create the splat vector for 0x53000000.
	SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT);

	// Create the right shift.
	SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT);
	SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);

	SDValue Low, High;
	if (Subtarget.hasSSE41()) {
	MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
	// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
	SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
	SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
	// Low will be bitcasted right away, so do not bother bitcasting back to its
	// original type.
	Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
	VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
	// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
	// (uint4) 0x53000000, 0xaa);
	SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
	SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift);
	// High will be bitcasted right away, so do not bother bitcasting back to
	// its original type.
	High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
	VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
	} else {
	SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
	// uint4 lo = (v & (uint4) 0xffff) \| (uint4) 0x4b000000;
	SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
	Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow);

	// uint4 hi = (v >> 16) \| (uint4) 0x53000000;
	High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
	}

	// Create the vector constant for (0x1.0p39f + 0x1.0p23f).
	SDValue VecCstFSub = DAG.getConstantFP(
	APFloat(APFloat::IEEEsingle(), APInt(32, 0x53000080)), DL, VecFloatVT);

	// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
	// NOTE: By using fsub of a positive constant instead of fadd of a negative
	// constant, we avoid reassociation in MachineCombiner when unsafe-fp-math is
	// enabled. See PR24512.
	SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
	// TODO: Are there any fast-math-flags to propagate here?
	// (float4) lo;
	SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
	// return (float4) lo + fhi;
	if (IsStrict) {
	SDValue FHigh = DAG.getNode(ISD::STRICT_FSUB, DL, {VecFloatVT, MVT::Other},
	{Op.getOperand(0), HighBitcast, VecCstFSub});
	return DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other},
	{FHigh.getValue(1), LowBitcast, FHigh});
	}

	SDValue FHigh =
	DAG.getNode(ISD::FSUB, DL, VecFloatVT, HighBitcast, VecCstFSub);
	return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
	}

	static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
	SDValue N0 = Op.getOperand(OpNo);
	MVT SrcVT = N0.getSimpleValueType();
	SDLoc dl(Op);

	switch (SrcVT.SimpleTy) {
	default:
	llvm_unreachable("Custom UINT_TO_FP is not supported!");
	case MVT::v2i32:
	return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl);
	case MVT::v4i32:
	case MVT::v8i32:
	return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
	case MVT::v2i64:
	case MVT::v4i64:
	return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget);
	}
	}

	SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	unsigned OpNo = IsStrict ? 1 : 0;
	SDValue Src = Op.getOperand(OpNo);
	SDLoc dl(Op);
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	MVT SrcVT = Src.getSimpleValueType();
	MVT DstVT = Op->getSimpleValueType(0);
	SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	// Bail out when we don't have native conversion instructions.
	if (DstVT == MVT::f128)
	return SDValue();

	if (isSoftFP16(DstVT))
	return promoteXINT_TO_FP(Op, DAG);
	else if (isLegalConversion(SrcVT, false, Subtarget))
	return Op;

	if (DstVT.isVector())
	return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);

	if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
	return LowerWin64_INT128_TO_FP(Op, DAG);

	if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
	return Extract;

	if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
	(SrcVT == MVT::i32 \|\| (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
	// Conversions from unsigned i32 to f32/f64 are legal,
	// using VCVTUSI2SS/SD. Same for i64 in 64-bit mode.
	return Op;
	}

	// Promote i32 to i64 and use a signed conversion on 64-bit targets.
	if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
	Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
	{Chain, Src});
	return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
	}

	if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
	return V;
	if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
	return V;

	// The transform for i64->f64 isn't correct for 0 when rounding to negative
	// infinity. It produces -0.0, so disable under strictfp.
	if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.hasSSE2() &&
	!IsStrict)
	return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
	// The transform for i32->f64/f32 isn't correct for 0 when rounding to
	// negative infinity. So disable under strictfp. Using FILD instead.
	if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80 &&
	!IsStrict)
	return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
	if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
	(DstVT == MVT::f32 \|\| DstVT == MVT::f64))
	return SDValue();

	// Make a 64-bit buffer, and use it to build an FILD.
	SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8);
	int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
	Align SlotAlign(8);
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI);
	if (SrcVT == MVT::i32) {
	SDValue OffsetSlot =
	DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
	SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign);
	SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
	OffsetSlot, MPI.getWithOffset(4), SlotAlign);
	std::pair<SDValue, SDValue> Tmp =
	BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG);
	if (IsStrict)
	return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);

	return Tmp.first;
	}

	assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
	SDValue ValueToStore = Src;
	if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
	// Bitcasting to f64 here allows us to do a single 64-bit store from
	// an SSE register, avoiding the store forwarding penalty that would come
	// with two 32-bit stores.
	ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
	}
	SDValue Store =
	DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign);
	// For i64 source, we need to add the appropriate power of 2 if the input
	// was negative. We must be careful to do the computation in x87 extended
	// precision, not in SSE.
	SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
	SDValue Ops[] = { Store, StackSlot };
	SDValue Fild =
	DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI,
	SlotAlign, MachineMemOperand::MOLoad);
	Chain = Fild.getValue(1);


	// Check whether the sign bit is set.
	SDValue SignSet = DAG.getSetCC(
	dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
	Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);

	// Build a 64 bit pair (FF, 0) in the constant pool, with FF in the hi bits.
	APInt FF(64, 0x5F80000000000000ULL);
	SDValue FudgePtr = DAG.getConstantPool(
	ConstantInt::get(*DAG.getContext(), FF), PtrVT);
	Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign();

	// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
	SDValue Zero = DAG.getIntPtrConstant(0, dl);
	SDValue Four = DAG.getIntPtrConstant(4, dl);
	SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero);
	FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);

	// Load the value out, extending it from f32 to f80.
	SDValue Fudge = DAG.getExtLoad(
	ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
	CPAlignment);
	Chain = Fudge.getValue(1);
	// Extend everything to 80 bits to force it to be done on x87.
	// TODO: Are there any fast-math-flags to propagate here?
	if (IsStrict) {
	unsigned Opc = ISD::STRICT_FADD;
	// Windows needs the precision control changed to 80bits around this add.
	if (Subtarget.isOSWindows() && DstVT == MVT::f32)
	Opc = X86ISD::STRICT_FP80_ADD;

	SDValue Add =
	DAG.getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
	// STRICT_FP_ROUND can't handle equal types.
	if (DstVT == MVT::f80)
	return Add;
	return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
	{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
	}
	unsigned Opc = ISD::FADD;
	// Windows needs the precision control changed to 80bits around this add.
	if (Subtarget.isOSWindows() && DstVT == MVT::f32)
	Opc = X86ISD::FP80_ADD;

	SDValue Add = DAG.getNode(Opc, dl, MVT::f80, Fild, Fudge);
	return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
	DAG.getIntPtrConstant(0, dl, /isTarget=/true));
	}

	// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
	// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
	// just return an SDValue().
	// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
	// to i16, i32 or i64, and we lower it to a legal sequence and return the
	// result.
	SDValue
	X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
	bool IsSigned, SDValue &Chain) const {
	bool IsStrict = Op->isStrictFPOpcode();
	SDLoc DL(Op);

	EVT DstTy = Op.getValueType();
	SDValue Value = Op.getOperand(IsStrict ? 1 : 0);
	EVT TheVT = Value.getValueType();
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
	// f16 must be promoted before using the lowering in this routine.
	// fp128 does not use this lowering.
	return SDValue();
	}

	// If using FIST to compute an unsigned i64, we'll need some fixup
	// to handle values above the maximum signed i64. A FIST is always
	// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
	bool UnsignedFixup = !IsSigned && DstTy == MVT::i64;

	// FIXME: This does not generate an invalid exception if the input does not
	// fit in i32. PR44019
	if (!IsSigned && DstTy != MVT::i64) {
	// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
	// The low 32 bits of the fist result will have the correct uint32 result.
	assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
	DstTy = MVT::i64;
	}

	assert(DstTy.getSimpleVT() <= MVT::i64 &&
	DstTy.getSimpleVT() >= MVT::i16 &&
	"Unknown FP_TO_INT to lower!");

	// We lower FP->int64 into FISTP64 followed by a load from a temporary
	// stack slot.
	MachineFunction &MF = DAG.getMachineFunction();
	unsigned MemSize = DstTy.getStoreSize();
	int SSFI =
	MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

	Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.

	if (UnsignedFixup) {
	//
	// Conversion to unsigned i64 is implemented with a select,
	// depending on whether the source value fits in the range
	// of a signed i64. Let Thresh be the FP equivalent of
	// 0x8000000000000000ULL.
	//
	// Adjust = (Value >= Thresh) ? 0x80000000 : 0;
	// FltOfs = (Value >= Thresh) ? 0x80000000 : 0;
	// FistSrc = (Value - FltOfs);
	// Fist-to-mem64 FistSrc
	// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
	// to XOR'ing the high 32 bits with Adjust.
	//
	// Being a power of 2, Thresh is exactly representable in all FP formats.
	// For X87 we'd like to use the smallest FP type for this constant, but
	// for DAG type consistency we have to match the FP operand type.

	APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
	LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
	bool LosesInfo = false;
	if (TheVT == MVT::f64)
	// The rounding mode is irrelevant as the conversion should be exact.
	Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
	&LosesInfo);
	else if (TheVT == MVT::f80)
	Status = Thresh.convert(APFloat::x87DoubleExtended(),
	APFloat::rmNearestTiesToEven, &LosesInfo);

	assert(Status == APFloat::opOK && !LosesInfo &&
	"FP conversion should have been exact");

	SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);

	EVT ResVT = getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(), TheVT);
	SDValue Cmp;
	if (IsStrict) {
	Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain,
	/IsSignaling/ true);
	Chain = Cmp.getValue(1);
	} else {
	Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE);
	}

	// Our preferred lowering of
	//
	// (Value >= Thresh) ? 0x8000000000000000ULL : 0
	//
	// is
	//
	// (Value >= Thresh) << 63
	//
	// but since we can get here after LegalOperations, DAGCombine might do the
	// wrong thing if we create a select. So, directly create the preferred
	// version.
	SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Cmp);
	SDValue Const63 = DAG.getConstant(63, DL, MVT::i8);
	Adjust = DAG.getNode(ISD::SHL, DL, MVT::i64, Zext, Const63);

	SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, ThreshVal,
	DAG.getConstantFP(0.0, DL, TheVT));

	if (IsStrict) {
	Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
	{ Chain, Value, FltOfs });
	Chain = Value.getValue(1);
	} else
	Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs);
	}

	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);

	// FIXME This causes a redundant load/store if the SSE-class value is already
	// in memory, such as if it is on the callstack.
	if (isScalarFPTypeInSSEReg(TheVT)) {
	assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
	Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
	SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
	SDValue Ops[] = { Chain, StackSlot };

	unsigned FLDSize = TheVT.getStoreSize();
	assert(FLDSize <= MemSize && "Stack slot not big enough");
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MPI, MachineMemOperand::MOLoad, FLDSize, Align(FLDSize));
	Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO);
	Chain = Value.getValue(1);
	}

	// Build the FP_TO_INT*_IN_MEM
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize));
	SDValue Ops[] = { Chain, Value, StackSlot };
	SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
	DAG.getVTList(MVT::Other),
	Ops, DstTy, MMO);

	SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI);
	Chain = Res.getValue(1);

	// If we need an unsigned fixup, XOR the result with adjust.
	if (UnsignedFixup)
	Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust);

	return Res;
	}

	static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT InVT = In.getSimpleValueType();
	SDLoc dl(Op);
	unsigned Opc = Op.getOpcode();

	assert(VT.isVector() && InVT.isVector() && "Expected vector type");
	assert((Opc == ISD::ANY_EXTEND \|\| Opc == ISD::ZERO_EXTEND) &&
	"Unexpected extension opcode");
	assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
	"Expected same number of elements");
	assert((VT.getVectorElementType() == MVT::i16 \|\|
	VT.getVectorElementType() == MVT::i32 \|\|
	VT.getVectorElementType() == MVT::i64) &&
	"Unexpected element type");
	assert((InVT.getVectorElementType() == MVT::i8 \|\|
	InVT.getVectorElementType() == MVT::i16 \|\|
	InVT.getVectorElementType() == MVT::i32) &&
	"Unexpected element type");

	unsigned ExtendInVecOpc = DAG.getOpcode_EXTEND_VECTOR_INREG(Opc);

	if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
	assert(InVT == MVT::v32i8 && "Unexpected VT!");
	return splitVectorIntUnary(Op, DAG);
	}

	if (Subtarget.hasInt256())
	return Op;

	// Optimize vectors in AVX mode:
	//
	// v8i16 -> v8i32
	// Use vpmovzwd for 4 lower elements v8i16 -> v4i32.
	// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
	// Concat upper and lower parts.
	//
	// v4i32 -> v4i64
	// Use vpmovzdq for 4 lower elements v4i32 -> v2i64.
	// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
	// Concat upper and lower parts.
	//
	MVT HalfVT = VT.getHalfNumVectorElementsVT();
	SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In);

	// Short-circuit if we can determine that each 128-bit half is the same value.
	// Otherwise, this is difficult to match and optimize.
	if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In))
	if (hasIdenticalHalvesShuffleMask(Shuf->getMask()))
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo);

	SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
	SDValue Undef = DAG.getUNDEF(InVT);
	bool NeedZero = Opc == ISD::ZERO_EXTEND;
	SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
	OpHi = DAG.getBitcast(HalfVT, OpHi);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
	}

	// Helper to split and extend a v16i1 mask to v16i8 or v16i16.
	static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In,
	const SDLoc &dl, SelectionDAG &DAG) {
	assert((VT == MVT::v16i8 \|\| VT == MVT::v16i16) && "Unexpected VT.");
	SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In,
	DAG.getIntPtrConstant(0, dl));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In,
	DAG.getIntPtrConstant(8, dl));
	Lo = DAG.getNode(ExtOpc, dl, MVT::v8i16, Lo);
	Hi = DAG.getNode(ExtOpc, dl, MVT::v8i16, Hi);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	}

	static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!");
	SDLoc DL(Op);
	unsigned NumElts = VT.getVectorNumElements();

	// For all vectors, but vXi8 we can just emit a sign_extend and a shift. This
	// avoids a constant pool load.
	if (VT.getVectorElementType() != MVT::i8) {
	SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
	return DAG.getNode(ISD::SRL, DL, VT, Extend,
	DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
	}

	// Extend VT if BWI is not supported.
	MVT ExtVT = VT;
	if (!Subtarget.hasBWI()) {
	// If v16i32 is to be avoided, we'll need to split and concatenate.
	if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
	return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG);

	ExtVT = MVT::getVectorVT(MVT::i32, NumElts);
	}

	// Widen to 512-bits if VLX is not supported.
	MVT WideVT = ExtVT;
	if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) {
	NumElts *= 512 / ExtVT.getSizeInBits();
	InVT = MVT::getVectorVT(MVT::i1, NumElts);
	In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT),
	In, DAG.getIntPtrConstant(0, DL));
	WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(),
	NumElts);
	}

	SDValue One = DAG.getConstant(1, DL, WideVT);
	SDValue Zero = DAG.getConstant(0, DL, WideVT);

	SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);

	// Truncate if we had to extend above.
	if (VT != ExtVT) {
	WideVT = MVT::getVectorVT(MVT::i8, NumElts);
	SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);
	}

	// Extract back to 128/256-bit if we widened.
	if (WideVT != VT)
	SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal,
	DAG.getIntPtrConstant(0, DL));

	return SelectedVal;
	}

	static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue In = Op.getOperand(0);
	MVT SVT = In.getSimpleValueType();

	if (SVT.getVectorElementType() == MVT::i1)
	return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG);

	assert(Subtarget.hasAVX() && "Expected AVX support");
	return LowerAVXExtend(Op, DAG, Subtarget);
	}

	/// Helper to recursively truncate vector elements in half with PACKSS/PACKUS.
	/// It makes use of the fact that vectors with enough leading sign/zero bits
	/// prevent the PACKSS/PACKUS from saturating the results.
	/// AVX2 (Int256) sub-targets require extra shuffling as the PACK*S operates
	/// within each 128-bit lane.
	static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
	const SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert((Opcode == X86ISD::PACKSS \|\| Opcode == X86ISD::PACKUS) &&
	"Unexpected PACK opcode");
	assert(DstVT.isVector() && "VT not a vector?");

	// Requires SSE2 for PACKSS (SSE41 PACKUSDW is handled below).
	if (!Subtarget.hasSSE2())
	return SDValue();

	EVT SrcVT = In.getValueType();

	// No truncation required, we might get here due to recursive calls.
	if (SrcVT == DstVT)
	return In;

	// We only support vector truncation to 64bits or greater from a
	// 128bits or greater source.
	unsigned DstSizeInBits = DstVT.getSizeInBits();
	unsigned SrcSizeInBits = SrcVT.getSizeInBits();
	if ((DstSizeInBits % 64) != 0 \|\| (SrcSizeInBits % 128) != 0)
	return SDValue();

	unsigned NumElems = SrcVT.getVectorNumElements();
	if (!isPowerOf2_32(NumElems))
	return SDValue();

	LLVMContext &Ctx = *DAG.getContext();
	assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
	assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation");

	EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);

	// Pack to the largest type possible:
	// vXi64/vXi32 -> PACKSDW and vXi16 -> PACKSWB.
	EVT InVT = MVT::i16, OutVT = MVT::i8;
	if (SrcVT.getScalarSizeInBits() > 16 &&
	(Opcode == X86ISD::PACKSS \|\| Subtarget.hasSSE41())) {
	InVT = MVT::i32;
	OutVT = MVT::i16;
	}

	// 128bit -> 64bit truncate - PACK 128-bit src in the lower subvector.
	if (SrcVT.is128BitVector()) {
	InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits());
	OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits());
	In = DAG.getBitcast(InVT, In);
	SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT));
	Res = extractSubVector(Res, 0, DAG, DL, 64);
	return DAG.getBitcast(DstVT, Res);
	}

	// Split lower/upper subvectors.
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = splitVector(In, DAG, DL);

	unsigned SubSizeInBits = SrcSizeInBits / 2;
	InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
	OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());

	// 256bit -> 128bit truncate - PACK lower/upper 128-bit subvectors.
	if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
	Lo = DAG.getBitcast(InVT, Lo);
	Hi = DAG.getBitcast(InVT, Hi);
	SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi);
	return DAG.getBitcast(DstVT, Res);
	}

	// AVX2: 512bit -> 256bit truncate - PACK lower/upper 256-bit subvectors.
	// AVX2: 512bit -> 128bit truncate - PACK(PACK, PACK).
	if (SrcVT.is512BitVector() && Subtarget.hasInt256()) {
	Lo = DAG.getBitcast(InVT, Lo);
	Hi = DAG.getBitcast(InVT, Hi);
	SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi);

	// 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
	// so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
	// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
	SmallVector<int, 64> Mask;
	int Scale = 64 / OutVT.getScalarSizeInBits();
	narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask);
	Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);

	if (DstVT.is256BitVector())
	return DAG.getBitcast(DstVT, Res);

	// If 512bit -> 128bit truncate another stage.
	EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
	Res = DAG.getBitcast(PackedVT, Res);
	return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
	}

	// Recursively pack lower/upper subvectors, concat result and pack again.
	assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater");
	EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
	Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget);
	Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget);

	PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
	return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
	}

	static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {

	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT InVT = In.getSimpleValueType();

	assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type.");

	// Shift LSB to MSB and use VPMOVB/W2M or TESTD/Q.
	unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
	if (InVT.getScalarSizeInBits() <= 16) {
	if (Subtarget.hasBWI()) {
	// legal, will go to VPMOVB2M, VPMOVW2M
	if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
	// We need to shift to get the lsb into sign position.
	// Shift packed bytes not supported natively, bitcast to word
	MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
	In = DAG.getNode(ISD::SHL, DL, ExtVT,
	DAG.getBitcast(ExtVT, In),
	DAG.getConstant(ShiftInx, DL, ExtVT));
	In = DAG.getBitcast(InVT, In);
	}
	return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
	In, ISD::SETGT);
	}
	// Use TESTD/Q, extended vector to packed dword/qword.
	assert((InVT.is256BitVector() \|\| InVT.is128BitVector()) &&
	"Unexpected vector type.");
	unsigned NumElts = InVT.getVectorNumElements();
	assert((NumElts == 8 \|\| NumElts == 16) && "Unexpected number of elements");
	// We need to change to a wider element type that we have support for.
	// For 8 element vectors this is easy, we either extend to v8i32 or v8i64.
	// For 16 element vectors we extend to v16i32 unless we are explicitly
	// trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors
	// we need to split into two 8 element vectors which we can extend to v8i32,
	// truncate and concat the results. There's an additional complication if
	// the original type is v16i8. In that case we can't split the v16i8
	// directly, so we need to shuffle high elements to low and use
	// sign_extend_vector_inreg.
	if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) {
	SDValue Lo, Hi;
	if (InVT == MVT::v16i8) {
	Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In);
	Hi = DAG.getVectorShuffle(
	InVT, DL, In, In,
	{8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
	Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi);
	} else {
	assert(InVT == MVT::v16i16 && "Unexpected VT!");
	Lo = extract128BitVector(In, 0, DAG, DL);
	Hi = extract128BitVector(In, 8, DAG, DL);
	}
	// We're split now, just emit two truncates and a concat. The two
	// truncates will trigger legalization to come back to this function.
	Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo);
	Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Hi);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
	}
	// We either have 8 elements or we're allowed to use 512-bit vectors.
	// If we have VLX, we want to use the narrowest vector that can get the
	// job done so we use vXi32.
	MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts);
	MVT ExtVT = MVT::getVectorVT(EltVT, NumElts);
	In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
	InVT = ExtVT;
	ShiftInx = InVT.getScalarSizeInBits() - 1;
	}

	if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
	// We need to shift to get the lsb into sign position.
	In = DAG.getNode(ISD::SHL, DL, InVT, In,
	DAG.getConstant(ShiftInx, DL, InVT));
	}
	// If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m.
	if (Subtarget.hasDQI())
	return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
	return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE);
	}

	SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT InVT = In.getSimpleValueType();
	unsigned InNumEltBits = InVT.getScalarSizeInBits();

	assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
	"Invalid TRUNCATE operation");

	// If we're called by the type legalizer, handle a few cases.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(InVT)) {
	if ((InVT == MVT::v8i64 \|\| InVT == MVT::v16i32 \|\| InVT == MVT::v16i64) &&
	VT.is128BitVector()) {
	assert((InVT == MVT::v16i64 \|\| Subtarget.hasVLX()) &&
	"Unexpected subtarget!");
	// The default behavior is to truncate one step, concatenate, and then
	// truncate the remainder. We'd rather produce two 64-bit results and
	// concatenate those.
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(In, DL);

	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);

	Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo);
	Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
	}

	// Otherwise let default legalization handle it.
	return SDValue();
	}

	if (VT.getVectorElementType() == MVT::i1)
	return LowerTruncateVecI1(Op, DAG, Subtarget);

	// vpmovqb/w/d, vpmovdb/w, vpmovwb
	if (Subtarget.hasAVX512()) {
	if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) {
	assert(VT == MVT::v32i8 && "Unexpected VT!");
	return splitVectorIntUnary(Op, DAG);
	}

	// word to byte only under BWI. Otherwise we have to promoted to v16i32
	// and then truncate that. But we should only do that if we haven't been
	// asked to avoid 512-bit vectors. The actual promotion to v16i32 will be
	// handled by isel patterns.
	if (InVT != MVT::v16i16 \|\| Subtarget.hasBWI() \|\|
	Subtarget.canExtendTo512DQ())
	return Op;
	}

	unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16);
	unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;

	// Truncate with PACKUS if we are truncating a vector with leading zero bits
	// that extend all the way to the packed/truncated value.
	// Pre-SSE41 we can only use PACKUSWB.
	KnownBits Known = DAG.computeKnownBits(In);
	if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
	if (SDValue V =
	truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget))
	return V;

	// Truncate with PACKSS if we are truncating a vector with sign-bits that
	// extend all the way to the packed/truncated value.
	if ((InNumEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In))
	if (SDValue V =
	truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget))
	return V;

	// Handle truncation of V256 to V128 using shuffles.
	assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");

	if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
	// On AVX2, v4i64 -> v4i32 becomes VPERMD.
	if (Subtarget.hasInt256()) {
	static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
	In = DAG.getBitcast(MVT::v8i32, In);
	In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
	DAG.getIntPtrConstant(0, DL));
	}

	SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
	DAG.getIntPtrConstant(0, DL));
	SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
	DAG.getIntPtrConstant(2, DL));
	static const int ShufMask[] = {0, 2, 4, 6};
	return DAG.getVectorShuffle(VT, DL, DAG.getBitcast(MVT::v4i32, OpLo),
	DAG.getBitcast(MVT::v4i32, OpHi), ShufMask);
	}

	if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
	// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
	if (Subtarget.hasInt256()) {
	// The PSHUFB mask:
	static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
	-1, -1, -1, -1, -1, -1, -1, -1,
	16, 17, 20, 21, 24, 25, 28, 29,
	-1, -1, -1, -1, -1, -1, -1, -1 };
	In = DAG.getBitcast(MVT::v32i8, In);
	In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
	In = DAG.getBitcast(MVT::v4i64, In);

	static const int ShufMask2[] = {0, 2, -1, -1};
	In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
	In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getBitcast(MVT::v8i16, In);
	}

	SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
	DAG.getIntPtrConstant(0, DL));
	SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
	DAG.getIntPtrConstant(4, DL));

	// The PSHUFB mask:
	static const int ShufMask1[] = {0, 2, 4, 6, -1, -1, -1, -1};

	OpLo = DAG.getBitcast(MVT::v8i16, OpLo);
	OpHi = DAG.getBitcast(MVT::v8i16, OpHi);

	OpLo = DAG.getVectorShuffle(MVT::v8i16, DL, OpLo, OpLo, ShufMask1);
	OpHi = DAG.getVectorShuffle(MVT::v8i16, DL, OpHi, OpHi, ShufMask1);

	OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
	OpHi = DAG.getBitcast(MVT::v4i32, OpHi);

	// The MOVLHPS Mask:
	static const int ShufMask2[] = {0, 1, 4, 5};
	SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
	return DAG.getBitcast(MVT::v8i16, res);
	}

	if (VT == MVT::v16i8 && InVT == MVT::v16i16) {
	// Use an AND to zero uppper bits for PACKUS.
	In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT));

	SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
	DAG.getIntPtrConstant(0, DL));
	SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
	DAG.getIntPtrConstant(8, DL));
	return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
	}

	llvm_unreachable("All 256->128 cases should have been handled above!");
	}

	// We can leverage the specific way the "cvttps2dq/cvttpd2dq" instruction
	// behaves on out of range inputs to generate optimized conversions.
	static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT SrcVT = Src.getSimpleValueType();
	unsigned DstBits = VT.getScalarSizeInBits();
	assert(DstBits == 32 && "expandFP_TO_UINT_SSE - only vXi32 supported");

	// Calculate the converted result for values in the range 0 to
	// 2^31-1 ("Small") and from 2^31 to 2^32-1 ("Big").
	SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src);
	SDValue Big =
	DAG.getNode(X86ISD::CVTTP2SI, dl, VT,
	DAG.getNode(ISD::FSUB, dl, SrcVT, Src,
	DAG.getConstantFP(2147483648.0f, dl, SrcVT)));

	// The "CVTTP2SI" instruction conveniently sets the sign bit if
	// and only if the value was out of range. So we can use that
	// as our indicator that we rather use "Big" instead of "Small".
	//
	// Use "Small" if "IsOverflown" has all bits cleared
	// and "0x80000000 \| Big" if all bits in "IsOverflown" are set.

	// AVX1 can't use the signsplat masking for 256-bit vectors - we have to
	// use the slightly slower blendv select instead.
	if (VT == MVT::v8i32 && !Subtarget.hasAVX2()) {
	SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big);
	return DAG.getNode(X86ISD::BLENDV, dl, VT, Small, Overflow, Small);
	}

	SDValue IsOverflown =
	DAG.getNode(X86ISD::VSRAI, dl, VT, Small,
	DAG.getTargetConstant(DstBits - 1, dl, MVT::i8));
	return DAG.getNode(ISD::OR, dl, VT, Small,
	DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown));
	}

	SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
	MVT VT = Op->getSimpleValueType(0);
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
	MVT SrcVT = Src.getSimpleValueType();
	SDLoc dl(Op);

	SDValue Res;
	if (isSoftFP16(SrcVT)) {
	MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
	if (IsStrict)
	return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
	{Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
	{NVT, MVT::Other}, {Chain, Src})});
	return DAG.getNode(Op.getOpcode(), dl, VT,
	DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src));
	} else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) {
	return Op;
	}

	if (VT.isVector()) {
	if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
	MVT ResVT = MVT::v4i32;
	MVT TruncVT = MVT::v4i1;
	unsigned Opc;
	if (IsStrict)
	Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
	else
	Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;

	if (!IsSigned && !Subtarget.hasVLX()) {
	assert(Subtarget.useAVX512Regs() && "Unexpected features!");
	// Widen to 512-bits.
	ResVT = MVT::v8i32;
	TruncVT = MVT::v8i1;
	Opc = Op.getOpcode();
	// Need to concat with zero vector for strict fp to avoid spurious
	// exceptions.
	// TODO: Should we just do this for non-strict as well?
	SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64)
	: DAG.getUNDEF(MVT::v8f64);
	Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
	DAG.getIntPtrConstant(0, dl));
	}
	if (IsStrict) {
	Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(Opc, dl, ResVT, Src);
	}

	Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
	DAG.getIntPtrConstant(0, dl));
	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) {
	if (VT == MVT::v8i16 \|\| VT == MVT::v16i16 \|\| VT == MVT::v32i16)
	return Op;

	MVT ResVT = VT;
	MVT EleVT = VT.getVectorElementType();
	if (EleVT != MVT::i64)
	ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;

	if (SrcVT != MVT::v8f16) {
	SDValue Tmp =
	IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
	SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
	Ops[0] = Src;
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
	}

	if (IsStrict) {
	Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI
	: X86ISD::STRICT_CVTTP2UI,
	dl, {ResVT, MVT::Other}, {Chain, Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl,
	ResVT, Src);
	}

	// TODO: Need to add exception check code for strict FP.
	if (EleVT.getSizeInBits() < 16) {
	ResVT = MVT::getVectorVT(EleVT, 8);
	Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res);
	}

	if (ResVT != VT)
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 \|\| SrcVT == MVT::v8f64)) {
	if (IsStrict) {
	Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
	: ISD::STRICT_FP_TO_UINT,
	dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
	MVT::v8i32, Src);
	}

	// TODO: Need to add exception check code for strict FP.
	Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	// v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
	if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
	assert(!IsSigned && "Expected unsigned conversion!");
	assert(Subtarget.useAVX512Regs() && "Requires avx512f");
	return Op;
	}

	// Widen vXi32 fp_to_uint with avx512f to 512-bit source.
	if ((VT == MVT::v4i32 \|\| VT == MVT::v8i32) &&
	(SrcVT == MVT::v4f64 \|\| SrcVT == MVT::v4f32 \|\| SrcVT == MVT::v8f32) &&
	Subtarget.useAVX512Regs()) {
	assert(!IsSigned && "Expected unsigned conversion!");
	assert(!Subtarget.hasVLX() && "Unexpected features!");
	MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
	MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
	// Need to concat with zero vector for strict fp to avoid spurious
	// exceptions.
	// TODO: Should we just do this for non-strict as well?
	SDValue Tmp =
	IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT);
	Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict) {
	Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
	{Chain, Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
	}

	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	// Widen vXi64 fp_to_uint/fp_to_sint with avx512dq to 512-bit source.
	if ((VT == MVT::v2i64 \|\| VT == MVT::v4i64) &&
	(SrcVT == MVT::v2f64 \|\| SrcVT == MVT::v4f64 \|\| SrcVT == MVT::v4f32) &&
	Subtarget.useAVX512Regs() && Subtarget.hasDQI()) {
	assert(!Subtarget.hasVLX() && "Unexpected features!");
	MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
	// Need to concat with zero vector for strict fp to avoid spurious
	// exceptions.
	// TODO: Should we just do this for non-strict as well?
	SDValue Tmp =
	IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT);
	Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict) {
	Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
	{Chain, Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
	}

	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
	if (!Subtarget.hasVLX()) {
	// Non-strict nodes without VLX can we widened to v4f32->v4i64 by type
	// legalizer and then widened again by vector op legalization.
	if (!IsStrict)
	return SDValue();

	SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32);
	SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
	{Src, Zero, Zero, Zero});
	Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
	{Chain, Tmp});
	SDValue Chain = Tmp.getValue(1);
	Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getMergeValues({Tmp, Chain}, dl);
	}

	assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL");
	SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
	DAG.getUNDEF(MVT::v2f32));
	if (IsStrict) {
	unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI
	: X86ISD::STRICT_CVTTP2UI;
	return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp});
	}
	unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
	return DAG.getNode(Opc, dl, VT, Tmp);
	}

	// Generate optimized instructions for pre AVX512 unsigned conversions from
	// vXf32 to vXi32.
	if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) \|\|
	(VT == MVT::v4i32 && SrcVT == MVT::v4f64) \|\|
	(VT == MVT::v8i32 && SrcVT == MVT::v8f32)) {
	assert(!IsSigned && "Expected unsigned conversion!");
	return expandFP_TO_UINT_SSE(VT, Src, dl, DAG, Subtarget);
	}

	return SDValue();
	}

	assert(!VT.isVector());

	bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT);

	if (!IsSigned && UseSSEReg) {
	// Conversions from f32/f64 with AVX512 should be legal.
	if (Subtarget.hasAVX512())
	return Op;

	// We can leverage the specific way the "cvttss2si/cvttsd2si" instruction
	// behaves on out of range inputs to generate optimized conversions.
	if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) \|\|
	(VT == MVT::i64 && Subtarget.is64Bit()))) {
	unsigned DstBits = VT.getScalarSizeInBits();
	APInt UIntLimit = APInt::getSignMask(DstBits);
	SDValue FloatOffset = DAG.getNode(ISD::UINT_TO_FP, dl, SrcVT,
	DAG.getConstant(UIntLimit, dl, VT));
	MVT SrcVecVT = MVT::getVectorVT(SrcVT, 128 / SrcVT.getScalarSizeInBits());

	// Calculate the converted result for values in the range:
	// (i32) 0 to 2^31-1 ("Small") and from 2^31 to 2^32-1 ("Big").
	// (i64) 0 to 2^63-1 ("Small") and from 2^63 to 2^64-1 ("Big").
	SDValue Small =
	DAG.getNode(X86ISD::CVTTS2SI, dl, VT,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, Src));
	SDValue Big = DAG.getNode(
	X86ISD::CVTTS2SI, dl, VT,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT,
	DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FloatOffset)));

	// The "CVTTS2SI" instruction conveniently sets the sign bit if
	// and only if the value was out of range. So we can use that
	// as our indicator that we rather use "Big" instead of "Small".
	//
	// Use "Small" if "IsOverflown" has all bits cleared
	// and "0x80000000 \| Big" if all bits in "IsOverflown" are set.
	SDValue IsOverflown = DAG.getNode(
	ISD::SRA, dl, VT, Small, DAG.getConstant(DstBits - 1, dl, MVT::i8));
	return DAG.getNode(ISD::OR, dl, VT, Small,
	DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown));
	}

	// Use default expansion for i64.
	if (VT == MVT::i64)
	return SDValue();

	assert(VT == MVT::i32 && "Unexpected VT!");

	// Promote i32 to i64 and use a signed operation on 64-bit targets.
	// FIXME: This does not generate an invalid exception if the input does not
	// fit in i32. PR44019
	if (Subtarget.is64Bit()) {
	if (IsStrict) {
	Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other},
	{Chain, Src});
	Chain = Res.getValue(1);
	} else
	Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);

	Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	// Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can
	// use fisttp which will be handled later.
	if (!Subtarget.hasSSE3())
	return SDValue();
	}

	// Promote i16 to i32 if we can use a SSE operation or the type is f128.
	// FIXME: This does not generate an invalid exception if the input does not
	// fit in i16. PR44019
	if (VT == MVT::i16 && (UseSSEReg \|\| SrcVT == MVT::f128)) {
	assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
	if (IsStrict) {
	Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other},
	{Chain, Src});
	Chain = Res.getValue(1);
	} else
	Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);

	Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);
	return Res;
	}

	// If this is a FP_TO_SINT using SSEReg we're done.
	if (UseSSEReg && IsSigned)
	return Op;

	// fp128 needs to use a libcall.
	if (SrcVT == MVT::f128) {
	RTLIB::Libcall LC;
	if (IsSigned)
	LC = RTLIB::getFPTOSINT(SrcVT, VT);
	else
	LC = RTLIB::getFPTOUINT(SrcVT, VT);

	MakeLibCallOptions CallOptions;
	std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
	SDLoc(Op), Chain);

	if (IsStrict)
	return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);

	return Tmp.first;
	}

	// Fall back to X87.
	if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
	if (IsStrict)
	return DAG.getMergeValues({V, Chain}, dl);
	return V;
	}

	llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
	}

	SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();

	if (SrcVT == MVT::f16)
	return SDValue();

	// If the source is in an SSE register, the node is Legal.
	if (isScalarFPTypeInSSEReg(SrcVT))
	return Op;

	return LRINT_LLRINTHelper(Op.getNode(), DAG);
	}

	SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
	SelectionDAG &DAG) const {
	EVT DstVT = N->getValueType(0);
	SDValue Src = N->getOperand(0);
	EVT SrcVT = Src.getValueType();

	if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
	// f16 must be promoted before using the lowering in this routine.
	// fp128 does not use this lowering.
	return SDValue();
	}

	SDLoc DL(N);
	SDValue Chain = DAG.getEntryNode();

	bool UseSSE = isScalarFPTypeInSSEReg(SrcVT);

	// If we're converting from SSE, the stack slot needs to hold both types.
	// Otherwise it only needs to hold the DstVT.
	EVT OtherVT = UseSSE ? SrcVT : DstVT;
	SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT);
	int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);

	if (UseSSE) {
	assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
	Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
	SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
	SDValue Ops[] = { Chain, StackPtr };

	Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
	/Align/ std::nullopt,
	MachineMemOperand::MOLoad);
	Chain = Src.getValue(1);
	}

	SDValue StoreOps[] = { Chain, Src, StackPtr };
	Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other),
	StoreOps, DstVT, MPI, /Align/ std::nullopt,
	MachineMemOperand::MOStore);

	return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
	}

	SDValue
	X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
	// This is based on the TargetLowering::expandFP_TO_INT_SAT implementation,
	// but making use of X86 specifics to produce better instruction sequences.
	SDNode *Node = Op.getNode();
	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
	unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
	SDLoc dl(SDValue(Node, 0));
	SDValue Src = Node->getOperand(0);

	// There are three types involved here: SrcVT is the source floating point
	// type, DstVT is the type of the result, and TmpVT is the result of the
	// intermediate FP_TO_*INT operation we'll use (which may be a promotion of
	// DstVT).
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Node->getValueType(0);
	EVT TmpVT = DstVT;

	// This code is only for floats and doubles. Fall back to generic code for
	// anything else.
	if (!isScalarFPTypeInSSEReg(SrcVT) \|\| isSoftFP16(SrcVT))
	return SDValue();

	EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
	unsigned SatWidth = SatVT.getScalarSizeInBits();
	unsigned DstWidth = DstVT.getScalarSizeInBits();
	unsigned TmpWidth = TmpVT.getScalarSizeInBits();
	assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
	"Expected saturation width smaller than result width");

	// Promote result of FP_TO_*INT to at least 32 bits.
	if (TmpWidth < 32) {
	TmpVT = MVT::i32;
	TmpWidth = 32;
	}

	// Promote conversions to unsigned 32-bit to 64-bit, because it will allow
	// us to use a native signed conversion instead.
	if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) {
	TmpVT = MVT::i64;
	TmpWidth = 64;
	}

	// If the saturation width is smaller than the size of the temporary result,
	// we can always use signed conversion, which is native.
	if (SatWidth < TmpWidth)
	FpToIntOpcode = ISD::FP_TO_SINT;

	// Determine minimum and maximum integer values and their corresponding
	// floating-point values.
	APInt MinInt, MaxInt;
	if (IsSigned) {
	MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
	MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
	} else {
	MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
	MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
	}

	APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
	APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));

	APFloat::opStatus MinStatus = MinFloat.convertFromAPInt(
	MinInt, IsSigned, APFloat::rmTowardZero);
	APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt(
	MaxInt, IsSigned, APFloat::rmTowardZero);
	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact)
	&& !(MaxStatus & APFloat::opStatus::opInexact);

	SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
	SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);

	// If the integer bounds are exactly representable as floats, emit a
	// min+max+fptoi sequence. Otherwise use comparisons and selects.
	if (AreExactFloatBounds) {
	if (DstVT != TmpVT) {
	// Clamp by MinFloat from below. If Src is NaN, propagate NaN.
	SDValue MinClamped = DAG.getNode(
	X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
	// Clamp by MaxFloat from above. If Src is NaN, propagate NaN.
	SDValue BothClamped = DAG.getNode(
	X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
	// Convert clamped value to integer.
	SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);

	// NaN will become INDVAL, with the top bit set and the rest zero.
	// Truncation will discard the top bit, resulting in zero.
	return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
	}

	// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
	SDValue MinClamped = DAG.getNode(
	X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
	// Clamp by MaxFloat from above. NaN cannot occur.
	SDValue BothClamped = DAG.getNode(
	X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
	// Convert clamped value to integer.
	SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);

	if (!IsSigned) {
	// In the unsigned case we're done, because we mapped NaN to MinFloat,
	// which is zero.
	return FpToInt;
	}

	// Otherwise, select zero if Src is NaN.
	SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
	return DAG.getSelectCC(
	dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
	}

	SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
	SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);

	// Result of direct conversion, which may be selected away.
	SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src);

	if (DstVT != TmpVT) {
	// NaN will become INDVAL, with the top bit set and the rest zero.
	// Truncation will discard the top bit, resulting in zero.
	FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
	}

	SDValue Select = FpToInt;
	// For signed conversions where we saturate to the same size as the
	// result type of the fptoi instructions, INDVAL coincides with integer
	// minimum, so we don't need to explicitly check it.
	if (!IsSigned \|\| SatWidth != TmpVT.getScalarSizeInBits()) {
	// If Src ULT MinFloat, select MinInt. In particular, this also selects
	// MinInt if Src is NaN.
	Select = DAG.getSelectCC(
	dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT);
	}

	// If Src OGT MaxFloat, select MaxInt.
	Select = DAG.getSelectCC(
	dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT);

	// In the unsigned case we are done, because we mapped NaN to MinInt, which
	// is already zero. The promoted case was already handled above.
	if (!IsSigned \|\| DstVT != TmpVT) {
	return Select;
	}

	// Otherwise, select 0 if Src is NaN.
	SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
	return DAG.getSelectCC(
	dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
	}

	SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();

	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
	SDValue In = Op.getOperand(IsStrict ? 1 : 0);
	MVT SVT = In.getSimpleValueType();

	// Let f16->f80 get lowered to a libcall, except for darwin, where we should
	// lower it to an fp_extend via f32 (as only f16<>f32 libcalls are available)
	if (VT == MVT::f128 \|\| (SVT == MVT::f16 && VT == MVT::f80 &&
	!Subtarget.getTargetTriple().isOSDarwin()))
	return SDValue();

	if (SVT == MVT::f16) {
	if (Subtarget.hasFP16())
	return Op;

	if (VT != MVT::f32) {
	if (IsStrict)
	return DAG.getNode(
	ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other},
	{Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, DL,
	{MVT::f32, MVT::Other}, {Chain, In})});

	return DAG.getNode(ISD::FP_EXTEND, DL, VT,
	DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, In));
	}

	if (!Subtarget.hasF16C()) {
	if (!Subtarget.getTargetTriple().isOSDarwin())
	return SDValue();

	assert(VT == MVT::f32 && SVT == MVT::f16 && "unexpected extend libcall");

	// Need a libcall, but ABI for f16 is soft-float on MacOS.
	TargetLowering::CallLoweringInfo CLI(DAG);
	Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	In = DAG.getBitcast(MVT::i16, In);
	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Node = In;
	Entry.Ty = EVT(MVT::i16).getTypeForEVT(*DAG.getContext());
	Entry.IsSExt = false;
	Entry.IsZExt = true;
	Args.push_back(Entry);

	SDValue Callee = DAG.getExternalSymbol(
	getLibcallName(RTLIB::FPEXT_F16_F32),
	getPointerTy(DAG.getDataLayout()));
	CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
	CallingConv::C, EVT(VT).getTypeForEVT(*DAG.getContext()), Callee,
	std::move(Args));

	SDValue Res;
	std::tie(Res,Chain) = LowerCallTo(CLI);
	if (IsStrict)
	Res = DAG.getMergeValues({Res, Chain}, DL);

	return Res;
	}

	In = DAG.getBitcast(MVT::i16, In);
	In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
	getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
	DAG.getIntPtrConstant(0, DL));
	SDValue Res;
	if (IsStrict) {
	Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, DL, {MVT::v4f32, MVT::Other},
	{Chain, In});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(X86ISD::CVTPH2PS, DL, MVT::v4f32, In,
	DAG.getTargetConstant(4, DL, MVT::i32));
	}
	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Res,
	DAG.getIntPtrConstant(0, DL));
	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, DL);
	return Res;
	}

	if (!SVT.isVector())
	return Op;

	if (SVT.getVectorElementType() == MVT::f16) {
	assert(Subtarget.hasF16C() && "Unexpected features!");
	if (SVT == MVT::v2f16)
	In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
	DAG.getUNDEF(MVT::v2f16));
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
	DAG.getUNDEF(MVT::v4f16));
	if (IsStrict)
	return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
	{Op->getOperand(0), Res});
	return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
	} else if (VT == MVT::v4f64 \|\| VT == MVT::v8f64) {
	return Op;
	}

	assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");

	SDValue Res =
	DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT));
	if (IsStrict)
	return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
	{Op->getOperand(0), Res});
	return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
	}

	SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();

	SDLoc DL(Op);
	SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
	SDValue In = Op.getOperand(IsStrict ? 1 : 0);
	MVT VT = Op.getSimpleValueType();
	MVT SVT = In.getSimpleValueType();

	if (SVT == MVT::f128 \|\| (VT == MVT::f16 && SVT == MVT::f80))
	return SDValue();

	if (VT == MVT::f16 && (SVT == MVT::f64 \|\| SVT == MVT::f32) &&
	!Subtarget.hasFP16() && (SVT == MVT::f64 \|\| !Subtarget.hasF16C())) {
	if (!Subtarget.getTargetTriple().isOSDarwin())
	return SDValue();

	// We need a libcall but the ABI for f16 libcalls on MacOS is soft.
	TargetLowering::CallLoweringInfo CLI(DAG);
	Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Node = In;
	Entry.Ty = EVT(SVT).getTypeForEVT(*DAG.getContext());
	Entry.IsSExt = false;
	Entry.IsZExt = true;
	Args.push_back(Entry);

	SDValue Callee = DAG.getExternalSymbol(
	getLibcallName(SVT == MVT::f64 ? RTLIB::FPROUND_F64_F16
	: RTLIB::FPROUND_F32_F16),
	getPointerTy(DAG.getDataLayout()));
	CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
	CallingConv::C, EVT(MVT::i16).getTypeForEVT(*DAG.getContext()), Callee,
	std::move(Args));

	SDValue Res;
	std::tie(Res, Chain) = LowerCallTo(CLI);

	Res = DAG.getBitcast(MVT::f16, Res);

	if (IsStrict)
	Res = DAG.getMergeValues({Res, Chain}, DL);

	return Res;
	}

	if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
	if (!Subtarget.hasF16C() \|\| SVT.getScalarType() != MVT::f32)
	return SDValue();

	if (VT.isVector())
	return Op;

	SDValue Res;
	SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL,
	MVT::i32);
	if (IsStrict) {
	Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4f32,
	DAG.getConstantFP(0, DL, MVT::v4f32), In,
	DAG.getIntPtrConstant(0, DL));
	Res = DAG.getNode(X86ISD::STRICT_CVTPS2PH, DL, {MVT::v8i16, MVT::Other},
	{Chain, Res, Rnd});
	Chain = Res.getValue(1);
	} else {
	// FIXME: Should we use zeros for upper elements for non-strict?
	Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, In);
	Res = DAG.getNode(X86ISD::CVTPS2PH, DL, MVT::v8i16, Res, Rnd);
	}

	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i16, Res,
	DAG.getIntPtrConstant(0, DL));
	Res = DAG.getBitcast(MVT::f16, Res);

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, DL);

	return Res;
	}

	return Op;
	}

	static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) {
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	assert(Src.getValueType() == MVT::i16 && Op.getValueType() == MVT::f32 &&
	"Unexpected VT!");

	SDLoc dl(Op);
	SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16,
	DAG.getConstant(0, dl, MVT::v8i16), Src,
	DAG.getIntPtrConstant(0, dl));

	SDValue Chain;
	if (IsStrict) {
	Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {MVT::v4f32, MVT::Other},
	{Op.getOperand(0), Res});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res);
	}

	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);

	return Res;
	}

	static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) {
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	assert(Src.getValueType() == MVT::f32 && Op.getValueType() == MVT::i16 &&
	"Unexpected VT!");

	SDLoc dl(Op);
	SDValue Res, Chain;
	if (IsStrict) {
	Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4f32,
	DAG.getConstantFP(0, dl, MVT::v4f32), Src,
	DAG.getIntPtrConstant(0, dl));
	Res = DAG.getNode(
	X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other},
	{Op.getOperand(0), Res, DAG.getTargetConstant(4, dl, MVT::i32)});
	Chain = Res.getValue(1);
	} else {
	// FIXME: Should we use zeros for upper elements for non-strict?
	Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, Src);
	Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res,
	DAG.getTargetConstant(4, dl, MVT::i32));
	}

	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res,
	DAG.getIntPtrConstant(0, dl));

	if (IsStrict)
	return DAG.getMergeValues({Res, Chain}, dl);

	return Res;
	}

	SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MakeLibCallOptions CallOptions;
	RTLIB::Libcall LC =
	RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
	SDValue Res =
	makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16,
	DAG.getBitcast(MVT::i32, Res));
	}

	/// Depending on uarch and/or optimizing for size, we might prefer to use a
	/// vector operation in place of the typical scalar operation.
	static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// If both operands have other uses, this is probably not profitable.
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	if (!LHS.hasOneUse() && !RHS.hasOneUse())
	return Op;

	// FP horizontal add/sub were added with SSE3. Integer with SSSE3.
	bool IsFP = Op.getSimpleValueType().isFloatingPoint();
	if (IsFP && !Subtarget.hasSSE3())
	return Op;
	if (!IsFP && !Subtarget.hasSSSE3())
	return Op;

	// Extract from a common vector.
	if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	LHS.getOperand(0) != RHS.getOperand(0) \|\|
	!isa<ConstantSDNode>(LHS.getOperand(1)) \|\|
	!isa<ConstantSDNode>(RHS.getOperand(1)) \|\|
	!shouldUseHorizontalOp(true, DAG, Subtarget))
	return Op;

	// Allow commuted 'hadd' ops.
	// TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
	unsigned HOpcode;
	switch (Op.getOpcode()) {
	case ISD::ADD: HOpcode = X86ISD::HADD; break;
	case ISD::SUB: HOpcode = X86ISD::HSUB; break;
	case ISD::FADD: HOpcode = X86ISD::FHADD; break;
	case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
	default:
	llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
	}
	unsigned LExtIndex = LHS.getConstantOperandVal(1);
	unsigned RExtIndex = RHS.getConstantOperandVal(1);
	if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 &&
	(HOpcode == X86ISD::HADD \|\| HOpcode == X86ISD::FHADD))
	std::swap(LExtIndex, RExtIndex);

	if ((LExtIndex & 1) != 0 \|\| RExtIndex != (LExtIndex + 1))
	return Op;

	SDValue X = LHS.getOperand(0);
	EVT VecVT = X.getValueType();
	unsigned BitWidth = VecVT.getSizeInBits();
	unsigned NumLanes = BitWidth / 128;
	unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes;
	assert((BitWidth == 128 \|\| BitWidth == 256 \|\| BitWidth == 512) &&
	"Not expecting illegal vector widths here");

	// Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
	// equivalent, so extract the 256/512-bit source op to 128-bit if we can.
	SDLoc DL(Op);
	if (BitWidth == 256 \|\| BitWidth == 512) {
	unsigned LaneIdx = LExtIndex / NumEltsPerLane;
	X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL);
	LExtIndex %= NumEltsPerLane;
	}

	// add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
	// add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
	// add (extractelt (X, 2), extractelt (X, 3)) --> extractelt (hadd X, X), 1
	// sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
	SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
	DAG.getIntPtrConstant(LExtIndex / 2, DL));
	}

	/// Depending on uarch and/or optimizing for size, we might prefer to use a
	/// vector operation in place of the typical scalar operation.
	SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
	assert((Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::f64) &&
	"Only expecting float/double");
	return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
	}

	/// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
	/// This mode isn't supported in hardware on X86. But as long as we aren't
	/// compiling with trapping math, we can emulate this with
	/// trunc(X + copysign(nextafter(0.5, 0.0), X)).
	static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
	SDValue N0 = Op.getOperand(0);
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	// N0 += copysign(nextafter(0.5, 0.0), N0)
	const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
	bool Ignored;
	APFloat Point5Pred = APFloat(0.5f);
	Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored);
	Point5Pred.next(/nextDown/true);

	SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT,
	DAG.getConstantFP(Point5Pred, dl, VT), N0);
	N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder);

	// Truncate the result to remove fraction.
	return DAG.getNode(ISD::FTRUNC, dl, VT, N0);
	}

	/// The only differences between FABS and FNEG are the mask and the logic op.
	/// FNEG also has a folding opportunity for FNEG(FABS(x)).
	static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
	assert((Op.getOpcode() == ISD::FABS \|\| Op.getOpcode() == ISD::FNEG) &&
	"Wrong opcode for lowering FABS or FNEG.");

	bool IsFABS = (Op.getOpcode() == ISD::FABS);

	// If this is a FABS and it has an FNEG user, bail out to fold the combination
	// into an FNABS. We'll lower the FABS after that if it is still in use.
	if (IsFABS)
	for (SDNode *User : Op->uses())
	if (User->getOpcode() == ISD::FNEG)
	return Op;

	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	bool IsF128 = (VT == MVT::f128);
	assert(VT.isFloatingPoint() && VT != MVT::f80 &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Unexpected type in LowerFABSorFNEG");

	// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
	// decide if we should generate a 16-byte constant mask when we only need 4 or
	// 8 bytes for the scalar case.

	// There are no scalar bitwise logical SSE/AVX instructions, so we
	// generate a 16-byte vector constant and logic op even for the scalar case.
	// Using a 16-byte mask allows folding the load of the mask with
	// the logic op, so it can save (~4 bytes) on code size.
	bool IsFakeVector = !VT.isVector() && !IsF128;
	MVT LogicVT = VT;
	if (IsFakeVector)
	LogicVT = (VT == MVT::f64) ? MVT::v2f64
	: (VT == MVT::f32) ? MVT::v4f32
	: MVT::v8f16;

	unsigned EltBits = VT.getScalarSizeInBits();
	// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
	APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
	APInt::getSignMask(EltBits);
	const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
	SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);

	SDValue Op0 = Op.getOperand(0);
	bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
	unsigned LogicOp = IsFABS ? X86ISD::FAND :
	IsFNABS ? X86ISD::FOR :
	X86ISD::FXOR;
	SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;

	if (VT.isVector() \|\| IsF128)
	return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);

	// For the scalar case extend to a 128-bit vector, perform the logic op,
	// and extract the scalar result back out.
	Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand);
	SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode,
	DAG.getIntPtrConstant(0, dl));
	}

	static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
	SDValue Mag = Op.getOperand(0);
	SDValue Sign = Op.getOperand(1);
	SDLoc dl(Op);

	// If the sign operand is smaller, extend it first.
	MVT VT = Op.getSimpleValueType();
	if (Sign.getSimpleValueType().bitsLT(VT))
	Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign);

	// And if it is bigger, shrink it first.
	if (Sign.getSimpleValueType().bitsGT(VT))
	Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign,
	DAG.getIntPtrConstant(0, dl, /isTarget=/true));

	// At this point the operands and the result should have the same
	// type, and that won't be f80 since that is not custom lowered.
	bool IsF128 = (VT == MVT::f128);
	assert(VT.isFloatingPoint() && VT != MVT::f80 &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Unexpected type in LowerFCOPYSIGN");

	const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);

	// Perform all scalar logic operations as 16-byte vectors because there are no
	// scalar FP logic instructions in SSE.
	// TODO: This isn't necessary. If we used scalar types, we might avoid some
	// unnecessary splats, but we might miss load folding opportunities. Should
	// this decision be based on OptimizeForSize?
	bool IsFakeVector = !VT.isVector() && !IsF128;
	MVT LogicVT = VT;
	if (IsFakeVector)
	LogicVT = (VT == MVT::f64) ? MVT::v2f64
	: (VT == MVT::f32) ? MVT::v4f32
	: MVT::v8f16;

	// The mask constants are automatically splatted for vector types.
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	SDValue SignMask = DAG.getConstantFP(
	APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
	SDValue MagMask = DAG.getConstantFP(
	APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT);

	// First, clear all bits but the sign bit from the second operand (sign).
	if (IsFakeVector)
	Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign);
	SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask);

	// Next, clear the sign bit from the first operand (magnitude).
	// TODO: If we had general constant folding for FP logic ops, this check
	// wouldn't be necessary.
	SDValue MagBits;
	if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) {
	APFloat APF = Op0CN->getValueAPF();
	APF.clearSign();
	MagBits = DAG.getConstantFP(APF, dl, LogicVT);
	} else {
	// If the magnitude operand wasn't a constant, we need to AND out the sign.
	if (IsFakeVector)
	Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag);
	MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask);
	}

	// OR the magnitude value with the sign bit.
	SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
	return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
	DAG.getIntPtrConstant(0, dl));
	}

	static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
	SDValue N0 = Op.getOperand(0);
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	MVT OpVT = N0.getSimpleValueType();
	assert((OpVT == MVT::f32 \|\| OpVT == MVT::f64) &&
	"Unexpected type for FGETSIGN");

	// Lower ISD::FGETSIGN to (AND (X86ISD::MOVMSK ...) 1).
	MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64);
	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0);
	Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res);
	Res = DAG.getZExtOrTrunc(Res, dl, VT);
	Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT));
	return Res;
	}

	/// Helper for attempting to create a X86ISD::BT node.
	static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG) {
	// If Src is i8, promote it to i32 with any_extend. There is no i8 BT
	// instruction. Since the shift amount is in-range-or-undefined, we know
	// that doing a bittest on the i32 value is ok. We extend to i32 because
	// the encoding for the i16 version is larger than the i32 version.
	// Also promote i16 to i32 for performance / code size reason.
	if (Src.getValueType().getScalarSizeInBits() < 32)
	Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);

	// No legal type found, give up.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
	return SDValue();

	// See if we can use the 32-bit instruction instead of the 64-bit one for a
	// shorter encoding. Since the former takes the modulo 32 of BitNo and the
	// latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
	// known to be zero.
	if (Src.getValueType() == MVT::i64 &&
	DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
	Src = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);

	// If the operand types disagree, extend the shift amount to match. Since
	// BT ignores high bits (like shifts) we can use anyextend.
	if (Src.getValueType() != BitNo.getValueType()) {
	// Peek through a mask/modulo operation.
	// TODO: DAGCombine fails to do this as it just checks isTruncateFree, but
	// we probably need a better IsDesirableToPromoteOp to handle this as well.
	if (BitNo.getOpcode() == ISD::AND && BitNo->hasOneUse())
	BitNo = DAG.getNode(ISD::AND, DL, Src.getValueType(),
	DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(),
	BitNo.getOperand(0)),
	DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(),
	BitNo.getOperand(1)));
	else
	BitNo = DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), BitNo);
	}

	return DAG.getNode(X86ISD::BT, DL, MVT::i32, Src, BitNo);
	}

	/// Helper for creating a X86ISD::SETCC node.
	static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
	SelectionDAG &DAG) {
	return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
	DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS);
	}

	/// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...))
	/// style scalarized (associative) reduction patterns. Partial reductions
	/// are supported when the pointer SrcMask is non-null.
	/// TODO - move this to SelectionDAG?
	static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
	SmallVectorImpl<SDValue> &SrcOps,
	SmallVectorImpl<APInt> *SrcMask = nullptr) {
	SmallVector<SDValue, 8> Opnds;
	DenseMap<SDValue, APInt> SrcOpMap;
	EVT VT = MVT::Other;

	// Recognize a special case where a vector is casted into wide integer to
	// test all 0s.
	assert(Op.getOpcode() == unsigned(BinOp) &&
	"Unexpected bit reduction opcode");
	Opnds.push_back(Op.getOperand(0));
	Opnds.push_back(Op.getOperand(1));

	for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
	SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
	// BFS traverse all BinOp operands.
	if (I->getOpcode() == unsigned(BinOp)) {
	Opnds.push_back(I->getOperand(0));
	Opnds.push_back(I->getOperand(1));
	// Re-evaluate the number of nodes to be traversed.
	e += 2; // 2 more nodes (LHS and RHS) are pushed.
	continue;
	}

	// Quit if a non-EXTRACT_VECTOR_ELT
	if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return false;

	// Quit if without a constant index.
	auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1));
	if (!Idx)
	return false;

	SDValue Src = I->getOperand(0);
	DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src);
	if (M == SrcOpMap.end()) {
	VT = Src.getValueType();
	// Quit if not the same type.
	if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType())
	return false;
	unsigned NumElts = VT.getVectorNumElements();
	APInt EltCount = APInt::getZero(NumElts);
	M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first;
	SrcOps.push_back(Src);
	}

	// Quit if element already used.
	unsigned CIdx = Idx->getZExtValue();
	if (M->second[CIdx])
	return false;
	M->second.setBit(CIdx);
	}

	if (SrcMask) {
	// Collect the source partial masks.
	for (SDValue &SrcOp : SrcOps)
	SrcMask->push_back(SrcOpMap[SrcOp]);
	} else {
	// Quit if not all elements are used.
	for (const auto &I : SrcOpMap)
	if (!I.second.isAllOnes())
	return false;
	}

	return true;
	}

	// Helper function for comparing all bits of a vector against zero.
	static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
	const APInt &Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG, X86::CondCode &X86CC) {
	EVT VT = V.getValueType();
	unsigned ScalarSize = VT.getScalarSizeInBits();
	if (Mask.getBitWidth() != ScalarSize) {
	assert(ScalarSize == 1 && "Element Mask vs Vector bitwidth mismatch");
	return SDValue();
	}

	assert((CC == ISD::SETEQ \|\| CC == ISD::SETNE) && "Unsupported ISD::CondCode");
	X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE);

	auto MaskBits = [&](SDValue Src) {
	if (Mask.isAllOnes())
	return Src;
	EVT SrcVT = Src.getValueType();
	SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT);
	return DAG.getNode(ISD::AND, DL, SrcVT, Src, MaskValue);
	};

	// For sub-128-bit vector, cast to (legal) integer and compare with zero.
	if (VT.getSizeInBits() < 128) {
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
	if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT))
	return SDValue();
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
	DAG.getBitcast(IntVT, MaskBits(V)),
	DAG.getConstant(0, DL, IntVT));
	}

	// Quit if not splittable to 128/256-bit vector.
	if (!isPowerOf2_32(VT.getSizeInBits()))
	return SDValue();

	// Split down to 128/256-bit vector.
	unsigned TestSize = Subtarget.hasAVX() ? 256 : 128;
	while (VT.getSizeInBits() > TestSize) {
	auto Split = DAG.SplitVector(V, DL);
	VT = Split.first.getValueType();
	V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
	}

	bool UsePTEST = Subtarget.hasSSE41();
	if (UsePTEST) {
	MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
	V = DAG.getBitcast(TestVT, MaskBits(V));
	return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V);
	}

	// Without PTEST, a masked v2i64 or-reduction is not faster than
	// scalarization.
	if (!Mask.isAllOnes() && VT.getScalarSizeInBits() > 32)
	return SDValue();

	V = DAG.getBitcast(MVT::v16i8, MaskBits(V));
	V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V,
	getZeroVector(MVT::v16i8, Subtarget, DAG, DL));
	V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V,
	DAG.getConstant(0xFFFF, DL, MVT::i32));
	}

	// Check whether an OR'd reduction tree is PTEST-able, or if we can fallback to
	// CMP(MOVMSK(PCMPEQB(X,0))).
	static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
	const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG, SDValue &X86CC) {
	assert((CC == ISD::SETEQ \|\| CC == ISD::SETNE) && "Unsupported ISD::CondCode");

	if (!Subtarget.hasSSE2() \|\| !Op->hasOneUse())
	return SDValue();

	// Check whether we're masking/truncating an OR-reduction result, in which
	// case track the masked bits.
	APInt Mask = APInt::getAllOnes(Op.getScalarValueSizeInBits());
	switch (Op.getOpcode()) {
	case ISD::TRUNCATE: {
	SDValue Src = Op.getOperand(0);
	Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(),
	Op.getScalarValueSizeInBits());
	Op = Src;
	break;
	}
	case ISD::AND: {
	if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	Mask = Cst->getAPIntValue();
	Op = Op.getOperand(0);
	}
	break;
	}
	}

	SmallVector<SDValue, 8> VecIns;
	if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) {
	EVT VT = VecIns[0].getValueType();
	assert(llvm::all_of(VecIns,
	[VT](SDValue V) { return VT == V.getValueType(); }) &&
	"Reduction source vector mismatch");

	// Quit if less than 128-bits or not splittable to 128/256-bit vector.
	if (VT.getSizeInBits() < 128 \|\| !isPowerOf2_32(VT.getSizeInBits()))
	return SDValue();

	// If more than one full vector is evaluated, OR them first before PTEST.
	for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1;
	Slot += 2, e += 1) {
	// Each iteration will OR 2 nodes and append the result until there is
	// only 1 node left, i.e. the final OR'd value of all vectors.
	SDValue LHS = VecIns[Slot];
	SDValue RHS = VecIns[Slot + 1];
	VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS));
	}

	X86::CondCode CCode;
	if (SDValue V = LowerVectorAllZero(DL, VecIns.back(), CC, Mask, Subtarget,
	DAG, CCode)) {
	X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8);
	return V;
	}
	}

	if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	ISD::NodeType BinOp;
	if (SDValue Match =
	DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) {
	X86::CondCode CCode;
	if (SDValue V =
	LowerVectorAllZero(DL, Match, CC, Mask, Subtarget, DAG, CCode)) {
	X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8);
	return V;
	}
	}
	}

	return SDValue();
	}

	/// return true if \c Op has a use that doesn't just read flags.
	static bool hasNonFlagsUse(SDValue Op) {
	for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
	++UI) {
	SDNode User = UI;
	unsigned UOpNo = UI.getOperandNo();
	if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
	// Look pass truncate.
	UOpNo = User->use_begin().getOperandNo();
	User = *User->use_begin();
	}

	if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
	!(User->getOpcode() == ISD::SELECT && UOpNo == 0))
	return true;
	}
	return false;
	}

	// Transform to an x86-specific ALU node with flags if there is a chance of
	// using an RMW op or only the flags are used. Otherwise, leave
	// the node alone and emit a 'cmp' or 'test' instruction.
	static bool isProfitableToUseFlagOp(SDValue Op) {
	for (SDNode *U : Op->uses())
	if (U->getOpcode() != ISD::CopyToReg &&
	U->getOpcode() != ISD::SETCC &&
	U->getOpcode() != ISD::STORE)
	return false;

	return true;
	}

	/// Emit nodes that will be selected as "test Op0,Op0", or something
	/// equivalent.
	static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
	SelectionDAG &DAG, const X86Subtarget &Subtarget) {
	// CF and OF aren't always set the way we want. Determine which
	// of these we need.
	bool NeedCF = false;
	bool NeedOF = false;
	switch (X86CC) {
	default: break;
	case X86::COND_A: case X86::COND_AE:
	case X86::COND_B: case X86::COND_BE:
	NeedCF = true;
	break;
	case X86::COND_G: case X86::COND_GE:
	case X86::COND_L: case X86::COND_LE:
	case X86::COND_O: case X86::COND_NO: {
	// Check if we really need to set the
	// Overflow flag. If NoSignedWrap is present
	// that is not actually needed.
	switch (Op->getOpcode()) {
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	case ISD::SHL:
	if (Op.getNode()->getFlags().hasNoSignedWrap())
	break;
	[[fallthrough]];
	default:
	NeedOF = true;
	break;
	}
	break;
	}
	}
	// See if we can use the EFLAGS value from the operand instead of
	// doing a separate TEST. TEST always sets OF and CF to 0, so unless
	// we prove that the arithmetic won't overflow, we can't use OF or CF.
	if (Op.getResNo() != 0 \|\| NeedOF \|\| NeedCF) {
	// Emit a CMP with 0, which is the TEST pattern.
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, Op.getValueType()));
	}
	unsigned Opcode = 0;
	unsigned NumOperands = 0;

	SDValue ArithOp = Op;

	// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
	// which may be the result of a CAST. We use the variable 'Op', which is the
	// non-casted variable when we check for possible users.
	switch (ArithOp.getOpcode()) {
	case ISD::AND:
	// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
	// because a TEST instruction will be better.
	if (!hasNonFlagsUse(Op))
	break;

	[[fallthrough]];
	case ISD::ADD:
	case ISD::SUB:
	case ISD::OR:
	case ISD::XOR:
	if (!isProfitableToUseFlagOp(Op))
	break;

	// Otherwise use a regular EFLAGS-setting instruction.
	switch (ArithOp.getOpcode()) {
	default: llvm_unreachable("unexpected operator!");
	case ISD::ADD: Opcode = X86ISD::ADD; break;
	case ISD::SUB: Opcode = X86ISD::SUB; break;
	case ISD::XOR: Opcode = X86ISD::XOR; break;
	case ISD::AND: Opcode = X86ISD::AND; break;
	case ISD::OR: Opcode = X86ISD::OR; break;
	}

	NumOperands = 2;
	break;
	case X86ISD::ADD:
	case X86ISD::SUB:
	case X86ISD::OR:
	case X86ISD::XOR:
	case X86ISD::AND:
	return SDValue(Op.getNode(), 1);
	case ISD::SSUBO:
	case ISD::USUBO: {
	// /USUBO/SSUBO will become a X86ISD::SUB and we can use its Z flag.
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0),
	Op->getOperand(1)).getValue(1);
	}
	default:
	break;
	}

	if (Opcode == 0) {
	// Emit a CMP with 0, which is the TEST pattern.
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, Op.getValueType()));
	}
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);

	SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), New);
	return SDValue(New.getNode(), 1);
	}

	/// Emit nodes that will be selected as "cmp Op0,Op1", or something
	/// equivalent.
	static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
	const SDLoc &dl, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (isNullConstant(Op1))
	return EmitTest(Op0, X86CC, dl, DAG, Subtarget);

	EVT CmpVT = Op0.getValueType();

	assert((CmpVT == MVT::i8 \|\| CmpVT == MVT::i16 \|\|
	CmpVT == MVT::i32 \|\| CmpVT == MVT::i64) && "Unexpected VT!");

	// Only promote the compare up to I32 if it is a 16 bit operation
	// with an immediate. 16 bit immediates are to be avoided.
	if (CmpVT == MVT::i16 && !Subtarget.isAtom() &&
	!DAG.getMachineFunction().getFunction().hasMinSize()) {
	ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
	ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
	// Don't do this if the immediate can fit in 8-bits.
	if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) \|\|
	(COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
	unsigned ExtendOp =
	isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	if (X86CC == X86::COND_E \|\| X86CC == X86::COND_NE) {
	// For equality comparisons try to use SIGN_EXTEND if the input was
	// truncate from something with enough sign bits.
	if (Op0.getOpcode() == ISD::TRUNCATE) {
	if (DAG.ComputeMaxSignificantBits(Op0.getOperand(0)) <= 16)
	ExtendOp = ISD::SIGN_EXTEND;
	} else if (Op1.getOpcode() == ISD::TRUNCATE) {
	if (DAG.ComputeMaxSignificantBits(Op1.getOperand(0)) <= 16)
	ExtendOp = ISD::SIGN_EXTEND;
	}
	}

	CmpVT = MVT::i32;
	Op0 = DAG.getNode(ExtendOp, dl, CmpVT, Op0);
	Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1);
	}
	}

	// Try to shrink i64 compares if the input has enough zero bits.
	// FIXME: Do this for non-constant compares for constant on LHS?
	if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
	Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
	cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 &&
	DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
	CmpVT = MVT::i32;
	Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
	Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
	}

	// 0-x == y --> x+y == 0
	// 0-x != y --> x+y != 0
	if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) &&
	Op0.hasOneUse() && (X86CC == X86::COND_E \|\| X86CC == X86::COND_NE)) {
	SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
	SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(1), Op1);
	return Add.getValue(1);
	}

	// x == 0-y --> x+y == 0
	// x != 0-y --> x+y != 0
	if (Op1.getOpcode() == ISD::SUB && isNullConstant(Op1.getOperand(0)) &&
	Op1.hasOneUse() && (X86CC == X86::COND_E \|\| X86CC == X86::COND_NE)) {
	SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
	SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0, Op1.getOperand(1));
	return Add.getValue(1);
	}

	// Use SUB instead of CMP to enable CSE between SUB and CMP.
	SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
	SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
	return Sub.getValue(1);
	}

	/// Check if replacement of SQRT with RSQRT should be disabled.
	bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	// We don't need to replace SQRT with RSQRT for half type.
	if (VT.getScalarType() == MVT::f16)
	return true;

	// We never want to use both SQRT and RSQRT instructions for the same input.
	if (DAG.doesNodeExist(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
	return false;

	if (VT.isVector())
	return Subtarget.hasFastVectorFSQRT();
	return Subtarget.hasFastScalarFSQRT();
	}

	/// The minimum architected relative accuracy is 2^-12. We need one
	/// Newton-Raphson step to have a good float result (24 bits of precision).
	SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
	SelectionDAG &DAG, int Enabled,
	int &RefinementSteps,
	bool &UseOneConstNR,
	bool Reciprocal) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
	// It is likely not profitable to do this for f64 because a double-precision
	// rsqrt estimate with refinement on x86 prior to FMA requires at least 16
	// instructions: convert to single, rsqrtss, convert back to double, refine
	// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
	// along with FMA, this could be a throughput win.
	// TODO: SQRT requires SSE2 to prevent the introduction of an illegal v4i32
	// after legalize types.
	if ((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) \|\|
	(VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) \|\|
	(VT == MVT::v8f32 && Subtarget.hasAVX()) \|\|
	(VT == MVT::v16f32 && Subtarget.useAVX512Regs())) {
	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = 1;

	UseOneConstNR = false;
	// There is no FSQRT for 512-bits, but there is RSQRT14.
	unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT;
	SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op);
	if (RefinementSteps == 0 && !Reciprocal)
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Op, Estimate);
	return Estimate;
	}

	if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
	Subtarget.hasFP16()) {
	assert(Reciprocal && "Don't replace SQRT with RSQRT for half type");
	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = 0;

	if (VT == MVT::f16) {
	SDValue Zero = DAG.getIntPtrConstant(0, DL);
	SDValue Undef = DAG.getUNDEF(MVT::v8f16);
	Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, Op);
	Op = DAG.getNode(X86ISD::RSQRT14S, DL, MVT::v8f16, Undef, Op);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Op, Zero);
	}

	return DAG.getNode(X86ISD::RSQRT14, DL, VT, Op);
	}
	return SDValue();
	}

	/// The minimum architected relative accuracy is 2^-12. We need one
	/// Newton-Raphson step to have a good float result (24 bits of precision).
	SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
	int Enabled,
	int &RefinementSteps) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
	// It is likely not profitable to do this for f64 because a double-precision
	// reciprocal estimate with refinement on x86 prior to FMA requires
	// 15 instructions: convert to single, rcpss, convert back to double, refine
	// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
	// along with FMA, this could be a throughput win.

	if ((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v8f32 && Subtarget.hasAVX()) \|\|
	(VT == MVT::v16f32 && Subtarget.useAVX512Regs())) {
	// Enable estimate codegen with 1 refinement step for vector division.
	// Scalar division estimates are disabled because they break too much
	// real-world code. These defaults are intended to match GCC behavior.
	if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
	return SDValue();

	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = 1;

	// There is no FSQRT for 512-bits, but there is RCP14.
	unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP;
	return DAG.getNode(Opcode, DL, VT, Op);
	}

	if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
	Subtarget.hasFP16()) {
	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = 0;

	if (VT == MVT::f16) {
	SDValue Zero = DAG.getIntPtrConstant(0, DL);
	SDValue Undef = DAG.getUNDEF(MVT::v8f16);
	Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, Op);
	Op = DAG.getNode(X86ISD::RCP14S, DL, MVT::v8f16, Undef, Op);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Op, Zero);
	}

	return DAG.getNode(X86ISD::RCP14, DL, VT, Op);
	}
	return SDValue();
	}

	/// If we have at least two divisions that use the same divisor, convert to
	/// multiplication by a reciprocal. This may need to be adjusted for a given
	/// CPU if a division's cost is not at least twice the cost of a multiplication.
	/// This is because we still need one division to calculate the reciprocal and
	/// then we need two multiplies by that reciprocal as replacements for the
	/// original divisions.
	unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
	return 2;
	}

	SDValue
	X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
	SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const {
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (isIntDivCheap(N->getValueType(0), Attr))
	return SDValue(N,0); // Lower SDIV as SDIV

	assert((Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()) &&
	"Unexpected divisor!");

	// Only perform this transform if CMOV is supported otherwise the select
	// below will become a branch.
	if (!Subtarget.canUseCMOV())
	return SDValue();

	// fold (sdiv X, pow2)
	EVT VT = N->getValueType(0);
	// FIXME: Support i8.
	if (VT != MVT::i16 && VT != MVT::i32 &&
	!(Subtarget.is64Bit() && VT == MVT::i64))
	return SDValue();

	unsigned Lg2 = Divisor.countTrailingZeros();

	// If the divisor is 2 or -2, the default expansion is better.
	if (Lg2 == 1)
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
	SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);

	// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
	SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
	SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);

	Created.push_back(Cmp.getNode());
	Created.push_back(Add.getNode());
	Created.push_back(CMov.getNode());

	// Divide by pow2.
	SDValue SRA =
	DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));

	// If we're dividing by a positive value, we're done. Otherwise, we must
	// negate the result.
	if (Divisor.isNonNegative())
	return SRA;

	Created.push_back(SRA.getNode());
	return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
	}

	/// Result of 'and' is compared against zero. Change to a BT node if possible.
	/// Returns the BT node and the condition code needed to use it.
	static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
	SelectionDAG &DAG, X86::CondCode &X86CC) {
	assert(And.getOpcode() == ISD::AND && "Expected AND node!");
	SDValue Op0 = And.getOperand(0);
	SDValue Op1 = And.getOperand(1);
	if (Op0.getOpcode() == ISD::TRUNCATE)
	Op0 = Op0.getOperand(0);
	if (Op1.getOpcode() == ISD::TRUNCATE)
	Op1 = Op1.getOperand(0);

	SDValue Src, BitNo;
	if (Op1.getOpcode() == ISD::SHL)
	std::swap(Op0, Op1);
	if (Op0.getOpcode() == ISD::SHL) {
	if (isOneConstant(Op0.getOperand(0))) {
	// If we looked past a truncate, check that it's only truncating away
	// known zeros.
	unsigned BitWidth = Op0.getValueSizeInBits();
	unsigned AndBitWidth = And.getValueSizeInBits();
	if (BitWidth > AndBitWidth) {
	KnownBits Known = DAG.computeKnownBits(Op0);
	if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
	return SDValue();
	}
	Src = Op1;
	BitNo = Op0.getOperand(1);
	}
	} else if (Op1.getOpcode() == ISD::Constant) {
	ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
	uint64_t AndRHSVal = AndRHS->getZExtValue();
	SDValue AndLHS = Op0;

	if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
	Src = AndLHS.getOperand(0);
	BitNo = AndLHS.getOperand(1);
	} else {
	// Use BT if the immediate can't be encoded in a TEST instruction or we
	// are optimizing for size and the immedaite won't fit in a byte.
	bool OptForSize = DAG.shouldOptForSize();
	if ((!isUInt<32>(AndRHSVal) \|\| (OptForSize && !isUInt<8>(AndRHSVal))) &&
	isPowerOf2_64(AndRHSVal)) {
	Src = AndLHS;
	BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
	Src.getValueType());
	}
	}
	}

	// No patterns found, give up.
	if (!Src.getNode())
	return SDValue();

	// Remove any bit flip.
	if (isBitwiseNot(Src)) {
	Src = Src.getOperand(0);
	CC = CC == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
	}

	// Attempt to create the X86ISD::BT node.
	if (SDValue BT = getBT(Src, BitNo, dl, DAG)) {
	X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
	return BT;
	}

	return SDValue();
	}

	// Check if pre-AVX condcode can be performed by a single FCMP op.
	static bool cheapX86FSETCC_SSE(ISD::CondCode SetCCOpcode) {
	return (SetCCOpcode != ISD::SETONE) && (SetCCOpcode != ISD::SETUEQ);
	}

	/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
	/// CMPs.
	static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
	SDValue &Op1, bool &IsAlwaysSignaling) {
	unsigned SSECC;
	bool Swap = false;

	// SSE Condition code mapping:
	// 0 - EQ
	// 1 - LT
	// 2 - LE
	// 3 - UNORD
	// 4 - NEQ
	// 5 - NLT
	// 6 - NLE
	// 7 - ORD
	switch (SetCCOpcode) {
	default: llvm_unreachable("Unexpected SETCC condition");
	case ISD::SETOEQ:
	case ISD::SETEQ: SSECC = 0; break;
	case ISD::SETOGT:
	case ISD::SETGT: Swap = true; [[fallthrough]];
	case ISD::SETLT:
	case ISD::SETOLT: SSECC = 1; break;
	case ISD::SETOGE:
	case ISD::SETGE: Swap = true; [[fallthrough]];
	case ISD::SETLE:
	case ISD::SETOLE: SSECC = 2; break;
	case ISD::SETUO: SSECC = 3; break;
	case ISD::SETUNE:
	case ISD::SETNE: SSECC = 4; break;
	case ISD::SETULE: Swap = true; [[fallthrough]];
	case ISD::SETUGE: SSECC = 5; break;
	case ISD::SETULT: Swap = true; [[fallthrough]];
	case ISD::SETUGT: SSECC = 6; break;
	case ISD::SETO: SSECC = 7; break;
	case ISD::SETUEQ: SSECC = 8; break;
	case ISD::SETONE: SSECC = 12; break;
	}
	if (Swap)
	std::swap(Op0, Op1);

	switch (SetCCOpcode) {
	default:
	IsAlwaysSignaling = true;
	break;
	case ISD::SETEQ:
	case ISD::SETOEQ:
	case ISD::SETUEQ:
	case ISD::SETNE:
	case ISD::SETONE:
	case ISD::SETUNE:
	case ISD::SETO:
	case ISD::SETUO:
	IsAlwaysSignaling = false;
	break;
	}

	return SSECC;
	}

	/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
	/// concatenate the result back.
	static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS,
	ISD::CondCode Cond, SelectionDAG &DAG,
	const SDLoc &dl) {
	assert(VT.isInteger() && VT == LHS.getValueType() &&
	VT == RHS.getValueType() && "Unsupported VTs!");

	SDValue CC = DAG.getCondCode(Cond);

	// Extract the LHS Lo/Hi vectors
	SDValue LHS1, LHS2;
	std::tie(LHS1, LHS2) = splitVector(LHS, DAG, dl);

	// Extract the RHS Lo/Hi vectors
	SDValue RHS1, RHS2;
	std::tie(RHS1, RHS2) = splitVector(RHS, DAG, dl);

	// Issue the operation on the smaller types and concatenate the result back
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
	DAG.getNode(ISD::SETCC, dl, LoVT, LHS1, RHS1, CC),
	DAG.getNode(ISD::SETCC, dl, HiVT, LHS2, RHS2, CC));
	}

	static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue CC = Op.getOperand(2);
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	assert(VT.getVectorElementType() == MVT::i1 &&
	"Cannot set masked compare for this operation");

	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();

	// Prefer SETGT over SETLT.
	if (SetCCOpcode == ISD::SETLT) {
	SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode);
	std::swap(Op0, Op1);
	}

	return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
	}

	/// Given a buildvector constant, return a new vector constant with each element
	/// incremented or decremented. If incrementing or decrementing would result in
	/// unsigned overflow or underflow or this is not a simple vector constant,
	/// return an empty value.
	static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
	auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
	if (!BV)
	return SDValue();

	MVT VT = V.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<SDValue, 8> NewVecC;
	SDLoc DL(V);
	for (unsigned i = 0; i < NumElts; ++i) {
	auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
	if (!Elt \|\| Elt->isOpaque() \|\| Elt->getSimpleValueType(0) != EltVT)
	return SDValue();

	// Avoid overflow/underflow.
	const APInt &EltC = Elt->getAPIntValue();
	if ((IsInc && EltC.isMaxValue()) \|\| (!IsInc && EltC.isZero()))
	return SDValue();

	NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
	}

	return DAG.getBuildVector(VT, DL, NewVecC);
	}

	/// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
	/// Op0 u<= Op1:
	/// t = psubus Op0, Op1
	/// pcmpeq t, <0..0>
	static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
	ISD::CondCode Cond, const SDLoc &dl,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (!Subtarget.hasSSE2())
	return SDValue();

	MVT VET = VT.getVectorElementType();
	if (VET != MVT::i8 && VET != MVT::i16)
	return SDValue();

	switch (Cond) {
	default:
	return SDValue();
	case ISD::SETULT: {
	// If the comparison is against a constant we can turn this into a
	// setule. With psubus, setule does not require a swap. This is
	// beneficial because the constant in the register is no longer
	// destructed as the destination so it can be hoisted out of a loop.
	// Only do this pre-AVX since vpcmp* is no longer destructive.
	if (Subtarget.hasAVX())
	return SDValue();
	SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /IsInc/false);
	if (!ULEOp1)
	return SDValue();
	Op1 = ULEOp1;
	break;
	}
	case ISD::SETUGT: {
	// If the comparison is against a constant, we can turn this into a setuge.
	// This is beneficial because materializing a constant 0 for the PCMPEQ is
	// probably cheaper than XOR+PCMPGT using 2 different vector constants:
	// cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
	SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /IsInc/true);
	if (!UGEOp1)
	return SDValue();
	Op1 = Op0;
	Op0 = UGEOp1;
	break;
	}
	// Psubus is better than flip-sign because it requires no inversion.
	case ISD::SETUGE:
	std::swap(Op0, Op1);
	break;
	case ISD::SETULE:
	break;
	}

	SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
	return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
	DAG.getConstant(0, dl, VT));
	}

	static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC \|\|
	Op.getOpcode() == ISD::STRICT_FSETCCS;
	SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
	SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
	SDValue CC = Op.getOperand(IsStrict ? 3 : 2);
	MVT VT = Op->getSimpleValueType(0);
	ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
	bool isFP = Op1.getSimpleValueType().isFloatingPoint();
	SDLoc dl(Op);

	if (isFP) {
	MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
	assert(EltVT == MVT::f16 \|\| EltVT == MVT::f32 \|\| EltVT == MVT::f64);
	if (isSoftFP16(EltVT, Subtarget))
	return SDValue();

	bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
	SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

	// If we have a strict compare with a vXi1 result and the input is 128/256
	// bits we can't use a masked compare unless we have VLX. If we use a wider
	// compare like we do for non-strict, we might trigger spurious exceptions
	// from the upper elements. Instead emit a AVX compare and convert to mask.
	unsigned Opc;
	if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&
	(!IsStrict \|\| Subtarget.hasVLX() \|\|
	Op0.getSimpleValueType().is512BitVector())) {
	#ifndef NDEBUG
	unsigned Num = VT.getVectorNumElements();
	assert(Num <= 16 \|\| (Num == 32 && EltVT == MVT::f16));
	#endif
	Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
	} else {
	Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP;
	// The SSE/AVX packed FP comparison nodes are defined with a
	// floating-point vector result that matches the operand type. This allows
	// them to work with an SSE1 target (integer vector types are not legal).
	VT = Op0.getSimpleValueType();
	}

	SDValue Cmp;
	bool IsAlwaysSignaling;
	unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1, IsAlwaysSignaling);
	if (!Subtarget.hasAVX()) {
	// TODO: We could use following steps to handle a quiet compare with
	// signaling encodings.
	// 1. Get ordered masks from a quiet ISD::SETO
	// 2. Use the masks to mask potential unordered elements in operand A, B
	// 3. Get the compare results of masked A, B
	// 4. Calculating final result using the mask and result from 3
	// But currently, we just fall back to scalar operations.
	if (IsStrict && IsAlwaysSignaling && !IsSignaling)
	return SDValue();

	// Insert an extra signaling instruction to raise exception.
	if (IsStrict && !IsAlwaysSignaling && IsSignaling) {
	SDValue SignalCmp = DAG.getNode(
	Opc, dl, {VT, MVT::Other},
	{Chain, Op0, Op1, DAG.getTargetConstant(1, dl, MVT::i8)}); // LT_OS
	// FIXME: It seems we need to update the flags of all new strict nodes.
	// Otherwise, mayRaiseFPException in MI will return false due to
	// NoFPExcept = false by default. However, I didn't find it in other
	// patches.
	SignalCmp->setFlags(Op->getFlags());
	Chain = SignalCmp.getValue(1);
	}

	// In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
	// emit two comparisons and a logic op to tie them together.
	if (!cheapX86FSETCC_SSE(Cond)) {
	// LLVM predicate is SETUEQ or SETONE.
	unsigned CC0, CC1;
	unsigned CombineOpc;
	if (Cond == ISD::SETUEQ) {
	CC0 = 3; // UNORD
	CC1 = 0; // EQ
	CombineOpc = X86ISD::FOR;
	} else {
	assert(Cond == ISD::SETONE);
	CC0 = 7; // ORD
	CC1 = 4; // NEQ
	CombineOpc = X86ISD::FAND;
	}

	SDValue Cmp0, Cmp1;
	if (IsStrict) {
	Cmp0 = DAG.getNode(
	Opc, dl, {VT, MVT::Other},
	{Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)});
	Cmp1 = DAG.getNode(
	Opc, dl, {VT, MVT::Other},
	{Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)});
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1),
	Cmp1.getValue(1));
	} else {
	Cmp0 = DAG.getNode(
	Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8));
	Cmp1 = DAG.getNode(
	Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8));
	}
	Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
	} else {
	if (IsStrict) {
	Cmp = DAG.getNode(
	Opc, dl, {VT, MVT::Other},
	{Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
	Chain = Cmp.getValue(1);
	} else
	Cmp = DAG.getNode(
	Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
	}
	} else {
	// Handle all other FP comparisons here.
	if (IsStrict) {
	// Make a flip on already signaling CCs before setting bit 4 of AVX CC.
	SSECC \|= (IsAlwaysSignaling ^ IsSignaling) << 4;
	Cmp = DAG.getNode(
	Opc, dl, {VT, MVT::Other},
	{Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
	Chain = Cmp.getValue(1);
	} else
	Cmp = DAG.getNode(
	Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
	}

	if (VT.getFixedSizeInBits() >
	Op.getSimpleValueType().getFixedSizeInBits()) {
	// We emitted a compare with an XMM/YMM result. Finish converting to a
	// mask register using a vptestm.
	EVT CastVT = EVT(VT).changeVectorElementTypeToInteger();
	Cmp = DAG.getBitcast(CastVT, Cmp);
	Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp,
	DAG.getConstant(0, dl, CastVT), ISD::SETNE);
	} else {
	// If this is SSE/AVX CMPP, bitcast the result back to integer to match
	// the result type of SETCC. The bitcast is expected to be optimized
	// away during combining/isel.
	Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
	}

	if (IsStrict)
	return DAG.getMergeValues({Cmp, Chain}, dl);

	return Cmp;
	}

	assert(!IsStrict && "Strict SETCC only handles FP operands.");

	MVT VTOp0 = Op0.getSimpleValueType();
	(void)VTOp0;
	assert(VTOp0 == Op1.getSimpleValueType() &&
	"Expected operands with same type!");
	assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
	"Invalid number of packed elements for source and destination!");

	// The non-AVX512 code below works under the assumption that source and
	// destination types are the same.
	assert((Subtarget.hasAVX512() \|\| (VT == VTOp0)) &&
	"Value types for source and destination must be the same!");

	// The result is boolean, but operands are int/float
	if (VT.getVectorElementType() == MVT::i1) {
	// In AVX-512 architecture setcc returns mask with i1 elements,
	// But there is no compare instruction for i8 and i16 elements in KNL.
	assert((VTOp0.getScalarSizeInBits() >= 32 \|\| Subtarget.hasBWI()) &&
	"Unexpected operand type");
	return LowerIntVSETCC_AVX512(Op, DAG);
	}

	// Lower using XOP integer comparisons.
	if (VT.is128BitVector() && Subtarget.hasXOP()) {
	// Translate compare code to XOP PCOM compare mode.
	unsigned CmpMode = 0;
	switch (Cond) {
	default: llvm_unreachable("Unexpected SETCC condition");
	case ISD::SETULT:
	case ISD::SETLT: CmpMode = 0x00; break;
	case ISD::SETULE:
	case ISD::SETLE: CmpMode = 0x01; break;
	case ISD::SETUGT:
	case ISD::SETGT: CmpMode = 0x02; break;
	case ISD::SETUGE:
	case ISD::SETGE: CmpMode = 0x03; break;
	case ISD::SETEQ: CmpMode = 0x04; break;
	case ISD::SETNE: CmpMode = 0x05; break;
	}

	// Are we comparing unsigned or signed integers?
	unsigned Opc =
	ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;

	return DAG.getNode(Opc, dl, VT, Op0, Op1,
	DAG.getTargetConstant(CmpMode, dl, MVT::i8));
	}

	// (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.
	// Revert part of the simplifySetCCWithAnd combine, to avoid an invert.
	if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) {
	SDValue BC0 = peekThroughBitcasts(Op0);
	if (BC0.getOpcode() == ISD::AND) {
	APInt UndefElts;
	SmallVector<APInt, 64> EltBits;
	if (getTargetConstantBitsFromNode(BC0.getOperand(1),
	VT.getScalarSizeInBits(), UndefElts,
	EltBits, false, false)) {
	if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) {
	Cond = ISD::SETEQ;
	Op1 = DAG.getBitcast(VT, BC0.getOperand(1));
	}
	}
	}
	}

	// ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.
	if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND &&
	Op0.getOperand(1) == Op1 && Op0.hasOneUse()) {
	ConstantSDNode *C1 = isConstOrConstSplat(Op1);
	if (C1 && C1->getAPIntValue().isPowerOf2()) {
	unsigned BitWidth = VT.getScalarSizeInBits();
	unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1;

	SDValue Result = Op0.getOperand(0);
	Result = DAG.getNode(ISD::SHL, dl, VT, Result,
	DAG.getConstant(ShiftAmt, dl, VT));
	Result = DAG.getNode(ISD::SRA, dl, VT, Result,
	DAG.getConstant(BitWidth - 1, dl, VT));
	return Result;
	}
	}

	// Break 256-bit integer vector compare into smaller ones.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);

	// Break 512-bit integer vector compare into smaller ones.
	// TODO: Try harder to use VPCMPx + VPMOV2x?
	if (VT.is512BitVector())
	return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);

	// If we have a limit constant, try to form PCMPGT (signed cmp) to avoid
	// not-of-PCMPEQ:
	// X != INT_MIN --> X >s INT_MIN
	// X != INT_MAX --> X <s INT_MAX --> INT_MAX >s X
	// +X != 0 --> +X >s 0
	APInt ConstValue;
	if (Cond == ISD::SETNE &&
	ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) {
	if (ConstValue.isMinSignedValue())
	Cond = ISD::SETGT;
	else if (ConstValue.isMaxSignedValue())
	Cond = ISD::SETLT;
	else if (ConstValue.isZero() && DAG.SignBitIsZero(Op0))
	Cond = ISD::SETGT;
	}

	// If both operands are known non-negative, then an unsigned compare is the
	// same as a signed compare and there's no need to flip signbits.
	// TODO: We could check for more general simplifications here since we're
	// computing known bits.
	bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
	!(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));

	// Special case: Use min/max operations for unsigned compares.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (ISD::isUnsignedIntSetCC(Cond) &&
	(FlipSigns \|\| ISD::isTrueWhenEqual(Cond)) &&
	TLI.isOperationLegal(ISD::UMIN, VT)) {
	// If we have a constant operand, increment/decrement it and change the
	// condition to avoid an invert.
	if (Cond == ISD::SETUGT) {
	// X > C --> X >= (C+1) --> X == umax(X, C+1)
	if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /IsInc/true)) {
	Op1 = UGTOp1;
	Cond = ISD::SETUGE;
	}
	}
	if (Cond == ISD::SETULT) {
	// X < C --> X <= (C-1) --> X == umin(X, C-1)
	if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /IsInc/false)) {
	Op1 = ULTOp1;
	Cond = ISD::SETULE;
	}
	}
	bool Invert = false;
	unsigned Opc;
	switch (Cond) {
	default: llvm_unreachable("Unexpected condition code");
	case ISD::SETUGT: Invert = true; [[fallthrough]];
	case ISD::SETULE: Opc = ISD::UMIN; break;
	case ISD::SETULT: Invert = true; [[fallthrough]];
	case ISD::SETUGE: Opc = ISD::UMAX; break;
	}

	SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
	Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);

	// If the logical-not of the result is required, perform that now.
	if (Invert)
	Result = DAG.getNOT(dl, Result, VT);

	return Result;
	}

	// Try to use SUBUS and PCMPEQ.
	if (FlipSigns)
	if (SDValue V =
	LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG))
	return V;

	// We are handling one of the integer comparisons here. Since SSE only has
	// GT and EQ comparisons for integer, swapping operands and multiple
	// operations may be required for some comparisons.
	unsigned Opc = (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) ? X86ISD::PCMPEQ
	: X86ISD::PCMPGT;
	bool Swap = Cond == ISD::SETLT \|\| Cond == ISD::SETULT \|\|
	Cond == ISD::SETGE \|\| Cond == ISD::SETUGE;
	bool Invert = Cond == ISD::SETNE \|\|
	(Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));

	if (Swap)
	std::swap(Op0, Op1);

	// Check that the operation in question is available (most are plain SSE2,
	// but PCMPGTQ and PCMPEQQ have different requirements).
	if (VT == MVT::v2i64) {
	if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
	assert(Subtarget.hasSSE2() && "Don't know how to lower!");

	// Special case for sign bit test. We can use a v4i32 PCMPGT and shuffle
	// the odd elements over the even elements.
	if (!FlipSigns && !Invert && ISD::isBuildVectorAllZeros(Op0.getNode())) {
	Op0 = DAG.getConstant(0, dl, MVT::v4i32);
	Op1 = DAG.getBitcast(MVT::v4i32, Op1);

	SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
	static const int MaskHi[] = { 1, 1, 3, 3 };
	SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);

	return DAG.getBitcast(VT, Result);
	}

	if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) {
	Op0 = DAG.getBitcast(MVT::v4i32, Op0);
	Op1 = DAG.getConstant(-1, dl, MVT::v4i32);

	SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
	static const int MaskHi[] = { 1, 1, 3, 3 };
	SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);

	return DAG.getBitcast(VT, Result);
	}

	// Since SSE has no unsigned integer comparisons, we need to flip the sign
	// bits of the inputs before performing those operations. The lower
	// compare is always unsigned.
	SDValue SB = DAG.getConstant(FlipSigns ? 0x8000000080000000ULL
	: 0x0000000080000000ULL,
	dl, MVT::v2i64);

	Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB);
	Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB);

	// Cast everything to the right type.
	Op0 = DAG.getBitcast(MVT::v4i32, Op0);
	Op1 = DAG.getBitcast(MVT::v4i32, Op1);

	// Emulate PCMPGTQ with (hi1 > hi2) \| ((hi1 == hi2) & (lo1 > lo2))
	SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
	SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);

	// Create masks for only the low parts/high parts of the 64 bit integers.
	static const int MaskHi[] = { 1, 1, 3, 3 };
	static const int MaskLo[] = { 0, 0, 2, 2 };
	SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
	SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
	SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);

	SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
	Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);

	if (Invert)
	Result = DAG.getNOT(dl, Result, MVT::v4i32);

	return DAG.getBitcast(VT, Result);
	}

	if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) {
	// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
	// pcmpeqd + pshufd + pand.
	assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!");

	// First cast everything to the right type.
	Op0 = DAG.getBitcast(MVT::v4i32, Op0);
	Op1 = DAG.getBitcast(MVT::v4i32, Op1);

	// Do the compare.
	SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);

	// Make sure the lower and upper halves are both all-ones.
	static const int Mask[] = { 1, 0, 3, 2 };
	SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
	Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);

	if (Invert)
	Result = DAG.getNOT(dl, Result, MVT::v4i32);

	return DAG.getBitcast(VT, Result);
	}
	}

	// Since SSE has no unsigned integer comparisons, we need to flip the sign
	// bits of the inputs before performing those operations.
	if (FlipSigns) {
	MVT EltVT = VT.getVectorElementType();
	SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
	VT);
	Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
	Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
	}

	SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);

	// If the logical-not of the result is required, perform that now.
	if (Invert)
	Result = DAG.getNOT(dl, Result, VT);

	return Result;
	}

	// Try to select this as a KORTEST+SETCC or KTEST+SETCC if possible.
	static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	SDValue &X86CC) {
	// Only support equality comparisons.
	if (CC != ISD::SETEQ && CC != ISD::SETNE)
	return SDValue();

	// Must be a bitcast from vXi1.
	if (Op0.getOpcode() != ISD::BITCAST)
	return SDValue();

	Op0 = Op0.getOperand(0);
	MVT VT = Op0.getSimpleValueType();
	if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) &&
	!(Subtarget.hasDQI() && VT == MVT::v8i1) &&
	!(Subtarget.hasBWI() && (VT == MVT::v32i1 \|\| VT == MVT::v64i1)))
	return SDValue();

	X86::CondCode X86Cond;
	if (isNullConstant(Op1)) {
	X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
	} else if (isAllOnesConstant(Op1)) {
	// C flag is set for all ones.
	X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
	} else
	return SDValue();

	// If the input is an AND, we can combine it's operands into the KTEST.
	bool KTestable = false;
	if (Subtarget.hasDQI() && (VT == MVT::v8i1 \|\| VT == MVT::v16i1))
	KTestable = true;
	if (Subtarget.hasBWI() && (VT == MVT::v32i1 \|\| VT == MVT::v64i1))
	KTestable = true;
	if (!isNullConstant(Op1))
	KTestable = false;
	if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) {
	SDValue LHS = Op0.getOperand(0);
	SDValue RHS = Op0.getOperand(1);
	X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
	return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS);
	}

	// If the input is an OR, we can combine it's operands into the KORTEST.
	SDValue LHS = Op0;
	SDValue RHS = Op0;
	if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse()) {
	LHS = Op0.getOperand(0);
	RHS = Op0.getOperand(1);
	}

	X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
	return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
	}

	/// Emit flags for the given setcc condition and operands. Also returns the
	/// corresponding X86 condition code constant in X86CC.
	SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
	ISD::CondCode CC, const SDLoc &dl,
	SelectionDAG &DAG,
	SDValue &X86CC) const {
	// Optimize to BT if possible.
	// Lower (X & (1 << N)) == 0 to BT(X, N).
	// Lower ((X >>u N) & 1) != 0 to BT(X, N).
	// Lower ((X >>s N) & 1) != 0 to BT(X, N).
	if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	X86::CondCode X86CondCode;
	if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
	X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
	return BT;
	}
	}

	// Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
	// TODO: We could do AND tree with all 1s as well by using the C flag.
	if (isNullConstant(Op1) && (CC == ISD::SETEQ \|\| CC == ISD::SETNE))
	if (SDValue CmpZ =
	MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG, X86CC))
	return CmpZ;

	// Try to lower using KORTEST or KTEST.
	if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
	return Test;

	// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
	// these.
	if ((isOneConstant(Op1) \|\| isNullConstant(Op1)) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	// If the input is a setcc, then reuse the input setcc or use a new one with
	// the inverted condition.
	if (Op0.getOpcode() == X86ISD::SETCC) {
	bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);

	X86CC = Op0.getOperand(0);
	if (Invert) {
	X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
	CCode = X86::GetOppositeBranchCondition(CCode);
	X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
	}

	return Op0.getOperand(1);
	}
	}

	// Try to use the carry flag from the add in place of an separate CMP for:
	// (seteq (add X, -1), -1). Similar for setne.
	if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD &&
	Op0.getOperand(1) == Op1 && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	if (isProfitableToUseFlagOp(Op0)) {
	SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);

	SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0),
	Op0.getOperand(1));
	DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New);
	X86::CondCode CCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
	X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
	return SDValue(New.getNode(), 1);
	}
	}

	X86::CondCode CondCode =
	TranslateX86CC(CC, dl, /IsFP/ false, Op0, Op1, DAG);
	assert(CondCode != X86::COND_INVALID && "Unexpected condition code!");

	SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget);
	X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
	return EFLAGS;
	}

	SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

	bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC \|\|
	Op.getOpcode() == ISD::STRICT_FSETCCS;
	MVT VT = Op->getSimpleValueType(0);

	if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);

	assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
	SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
	SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
	SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
	SDLoc dl(Op);
	ISD::CondCode CC =
	cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();

	if (isSoftFP16(Op0.getValueType()))
	return SDValue();

	// Handle f128 first, since one possible outcome is a normal integer
	// comparison which gets handled by emitFlagsForSetcc.
	if (Op0.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1, Chain,
	Op.getOpcode() == ISD::STRICT_FSETCCS);

	// If softenSetCCOperands returned a scalar, use it.
	if (!Op1.getNode()) {
	assert(Op0.getValueType() == Op.getValueType() &&
	"Unexpected setcc expansion!");
	if (IsStrict)
	return DAG.getMergeValues({Op0, Chain}, dl);
	return Op0;
	}
	}

	if (Op0.getSimpleValueType().isInteger()) {
	// Attempt to canonicalize SGT/UGT -> SGE/UGE compares with constant which
	// reduces the number of EFLAGs bit reads (the GE conditions don't read ZF),
	// this may translate to less uops depending on uarch implementation. The
	// equivalent for SLE/ULE -> SLT/ULT isn't likely to happen as we already
	// canonicalize to that CondCode.
	// NOTE: Only do this if incrementing the constant doesn't increase the bit
	// encoding size - so it must either already be a i8 or i32 immediate, or it
	// shrinks down to that. We don't do this for any i64's to avoid additional
	// constant materializations.
	// TODO: Can we move this to TranslateX86CC to handle jumps/branches too?
	if (auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) {
	const APInt &Op1Val = Op1C->getAPIntValue();
	if (!Op1Val.isZero()) {
	// Ensure the constant+1 doesn't overflow.
	if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) \|\|
	(CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) {
	APInt Op1ValPlusOne = Op1Val + 1;
	if (Op1ValPlusOne.isSignedIntN(32) &&
	(!Op1Val.isSignedIntN(8) \|\| Op1ValPlusOne.isSignedIntN(8))) {
	Op1 = DAG.getConstant(Op1ValPlusOne, dl, Op0.getValueType());
	CC = CC == ISD::CondCode::SETGT ? ISD::CondCode::SETGE
	: ISD::CondCode::SETUGE;
	}
	}
	}
	}

	SDValue X86CC;
	SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
	SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
	return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
	}

	// Handle floating point.
	X86::CondCode CondCode = TranslateX86CC(CC, dl, /IsFP/ true, Op0, Op1, DAG);
	if (CondCode == X86::COND_INVALID)
	return SDValue();

	SDValue EFLAGS;
	if (IsStrict) {
	bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
	EFLAGS =
	DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP,
	dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
	Chain = EFLAGS.getValue(1);
	} else {
	EFLAGS = DAG.getNode(X86ISD::FCMP, dl, MVT::i32, Op0, Op1);
	}

	SDValue X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
	SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
	return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
	}

	SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	SDValue Carry = Op.getOperand(2);
	SDValue Cond = Op.getOperand(3);
	SDLoc DL(Op);

	assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
	X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());

	// Recreate the carry if needed.
	EVT CarryVT = Carry.getValueType();
	Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
	Carry, DAG.getAllOnesConstant(DL, CarryVT));

	SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
	SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
	return getSETCC(CC, Cmp.getValue(1), DL, DAG);
	}

	// This function returns three things: the arithmetic computation itself
	// (Value), an EFLAGS result (Overflow), and a condition code (Cond). The
	// flag and the condition code define the case in which the arithmetic
	// computation overflows.
	static std::pair<SDValue, SDValue>
	getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
	assert(Op.getResNo() == 0 && "Unexpected result number!");
	SDValue Value, Overflow;
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	unsigned BaseOp = 0;
	SDLoc DL(Op);
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Unknown ovf instruction!");
	case ISD::SADDO:
	BaseOp = X86ISD::ADD;
	Cond = X86::COND_O;
	break;
	case ISD::UADDO:
	BaseOp = X86ISD::ADD;
	Cond = isOneConstant(RHS) ? X86::COND_E : X86::COND_B;
	break;
	case ISD::SSUBO:
	BaseOp = X86ISD::SUB;
	Cond = X86::COND_O;
	break;
	case ISD::USUBO:
	BaseOp = X86ISD::SUB;
	Cond = X86::COND_B;
	break;
	case ISD::SMULO:
	BaseOp = X86ISD::SMUL;
	Cond = X86::COND_O;
	break;
	case ISD::UMULO:
	BaseOp = X86ISD::UMUL;
	Cond = X86::COND_O;
	break;
	}

	if (BaseOp) {
	// Also sets EFLAGS.
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
	Overflow = Value.getValue(1);
	}

	return std::make_pair(Value, Overflow);
	}

	static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
	// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
	// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
	// looks for this combo and may remove the "setcc" instruction if the "setcc"
	// has only one use.
	SDLoc DL(Op);
	X86::CondCode Cond;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG);

	SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
	assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!");
	return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);
	}

	/// Return true if opcode is a X86 logical comparison.
	static bool isX86LogicalCmp(SDValue Op) {
	unsigned Opc = Op.getOpcode();
	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI \|\|
	Opc == X86ISD::FCMP)
	return true;
	if (Op.getResNo() == 1 &&
	(Opc == X86ISD::ADD \|\| Opc == X86ISD::SUB \|\| Opc == X86ISD::ADC \|\|
	Opc == X86ISD::SBB \|\| Opc == X86ISD::SMUL \|\| Opc == X86ISD::UMUL \|\|
	Opc == X86ISD::OR \|\| Opc == X86ISD::XOR \|\| Opc == X86ISD::AND))
	return true;

	return false;
	}

	static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
	if (V.getOpcode() != ISD::TRUNCATE)
	return false;

	SDValue VOp0 = V.getOperand(0);
	unsigned InBits = VOp0.getValueSizeInBits();
	unsigned Bits = V.getValueSizeInBits();
	return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
	}

	SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
	bool AddTest = true;
	SDValue Cond = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue Op2 = Op.getOperand(2);
	SDLoc DL(Op);
	MVT VT = Op1.getSimpleValueType();
	SDValue CC;

	if (isSoftFP16(VT)) {
	MVT NVT = VT.changeTypeToInteger();
	return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond,
	DAG.getBitcast(NVT, Op1),
	DAG.getBitcast(NVT, Op2)));
	}

	// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
	// are available or VBLENDV if AVX is available.
	// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
	if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) &&
	VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
	SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
	bool IsAlwaysSignaling;
	unsigned SSECC =
	translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(),
	CondOp0, CondOp1, IsAlwaysSignaling);

	if (Subtarget.hasAVX512()) {
	SDValue Cmp =
	DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1,
	DAG.getTargetConstant(SSECC, DL, MVT::i8));
	assert(!VT.isVector() && "Not a scalar type?");
	return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
	}

	if (SSECC < 8 \|\| Subtarget.hasAVX()) {
	SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
	DAG.getTargetConstant(SSECC, DL, MVT::i8));

	// If we have AVX, we can use a variable vector select (VBLENDV) instead
	// of 3 logic instructions for size savings and potentially speed.
	// Unfortunately, there is no scalar form of VBLENDV.

	// If either operand is a +0.0 constant, don't try this. We can expect to
	// optimize away at least one of the logic instructions later in that
	// case, so that sequence would be faster than a variable blend.

	// BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
	// uses XMM0 as the selection register. That may need just as many
	// instructions as the AND/ANDN/OR sequence due to register moves, so
	// don't bother.
	if (Subtarget.hasAVX() && !isNullFPConstant(Op1) &&
	!isNullFPConstant(Op2)) {
	// Convert to vectors, do a VSELECT, and convert back to scalar.
	// All of the conversions should be optimized away.
	MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
	SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
	SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
	SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);

	MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
	VCmp = DAG.getBitcast(VCmpVT, VCmp);

	SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	VSel, DAG.getIntPtrConstant(0, DL));
	}
	SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
	SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
	return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
	}
	}

	// AVX512 fallback is to lower selects of scalar floats to masked moves.
	if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) {
	SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond);
	return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
	}

	if (Cond.getOpcode() == ISD::SETCC &&
	!isSoftFP16(Cond.getOperand(0).getSimpleValueType())) {
	if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
	Cond = NewCond;
	// If the condition was updated, it's possible that the operands of the
	// select were also updated (for example, EmitTest has a RAUW). Refresh
	// the local references to the select operands in case they got stale.
	Op1 = Op.getOperand(1);
	Op2 = Op.getOperand(2);
	}
	}

	// (select (x == 0), -1, y) -> (sign_bit (x - 1)) \| y
	// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) \| y
	// (select (x != 0), y, -1) -> (sign_bit (x - 1)) \| y
	// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) \| y
	// (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
	// (select (and (x , 0x1) == 0), y, (z \| y) ) -> (-(and (x , 0x1)) & z ) \| y
	// (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
	// (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x
	if (Cond.getOpcode() == X86ISD::SETCC &&
	Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
	isNullConstant(Cond.getOperand(1).getOperand(1))) {
	SDValue Cmp = Cond.getOperand(1);
	SDValue CmpOp0 = Cmp.getOperand(0);
	unsigned CondCode = Cond.getConstantOperandVal(0);

	// Special handling for __builtin_ffs(X) - 1 pattern which looks like
	// (select (seteq X, 0), -1, (cttz_zero_undef X)). Disable the special
	// handle to keep the CMP with 0. This should be removed by
	// optimizeCompareInst by using the flags from the BSR/TZCNT used for the
	// cttz_zero_undef.
	auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) {
	return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() &&
	Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2));
	};
	if (Subtarget.canUseCMOV() && (VT == MVT::i32 \|\| VT == MVT::i64) &&
	((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) \|\|
	(CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) {
	// Keep Cmp.
	} else if ((isAllOnesConstant(Op1) \|\| isAllOnesConstant(Op2)) &&
	(CondCode == X86::COND_E \|\| CondCode == X86::COND_NE)) {
	SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
	SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);

	// 'X - 1' sets the carry flag if X == 0.
	// '0 - X' sets the carry flag if X != 0.
	// Convert the carry flag to a -1/0 mask with sbb:
	// select (X != 0), -1, Y --> 0 - X; or (sbb), Y
	// select (X == 0), Y, -1 --> 0 - X; or (sbb), Y
	// select (X != 0), Y, -1 --> X - 1; or (sbb), Y
	// select (X == 0), -1, Y --> X - 1; or (sbb), Y
	SDValue Sub;
	if (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE)) {
	SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
	Sub = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0);
	} else {
	SDValue One = DAG.getConstant(1, DL, CmpOp0.getValueType());
	Sub = DAG.getNode(X86ISD::SUB, DL, CmpVTs, CmpOp0, One);
	}
	SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
	Sub.getValue(1));
	return DAG.getNode(ISD::OR, DL, VT, SBB, Y);
	} else if (!Subtarget.canUseCMOV() && CondCode == X86::COND_E &&
	CmpOp0.getOpcode() == ISD::AND &&
	isOneConstant(CmpOp0.getOperand(1))) {
	SDValue Src1, Src2;
	// true if Op2 is XOR or OR operator and one of its operands
	// is equal to Op1
	// ( a , a op b) \|\| ( b , a op b)
	auto isOrXorPattern = [&]() {
	if ((Op2.getOpcode() == ISD::XOR \|\| Op2.getOpcode() == ISD::OR) &&
	(Op2.getOperand(0) == Op1 \|\| Op2.getOperand(1) == Op1)) {
	Src1 =
	Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0);
	Src2 = Op1;
	return true;
	}
	return false;
	};

	if (isOrXorPattern()) {
	SDValue Neg;
	unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits();
	// we need mask of all zeros or ones with same size of the other
	// operands.
	if (CmpSz > VT.getSizeInBits())
	Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);
	else if (CmpSz < VT.getSizeInBits())
	Neg = DAG.getNode(ISD::AND, DL, VT,
	DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),
	DAG.getConstant(1, DL, VT));
	else
	Neg = CmpOp0;
	SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	Neg); // -(and (x, 0x1))
	SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
	return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y
	}
	} else if ((VT == MVT::i32 \|\| VT == MVT::i64) && isNullConstant(Op2) &&
	Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) &&
	((CondCode == X86::COND_S) \|\| // smin(x, 0)
	(CondCode == X86::COND_G && hasAndNot(Op1)))) { // smax(x, 0)
	// (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x
	//
	// If the comparison is testing for a positive value, we have to invert
	// the sign bit mask, so only do that transform if the target has a
	// bitwise 'and not' instruction (the invert is free).
	// (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
	unsigned ShCt = VT.getSizeInBits() - 1;
	SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT);
	SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt);
	if (CondCode == X86::COND_G)
	Shift = DAG.getNOT(DL, Shift, VT);
	return DAG.getNode(ISD::AND, DL, VT, Shift, Op1);
	}
	}

	// Look past (and (setcc_carry (cmp ...)), 1).
	if (Cond.getOpcode() == ISD::AND &&
	Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
	isOneConstant(Cond.getOperand(1)))
	Cond = Cond.getOperand(0);

	// If condition flag is set by a X86ISD::CMP, then use it as the condition
	// setting operand in place of the X86ISD::SETCC.
	unsigned CondOpcode = Cond.getOpcode();
	if (CondOpcode == X86ISD::SETCC \|\|
	CondOpcode == X86ISD::SETCC_CARRY) {
	CC = Cond.getOperand(0);

	SDValue Cmp = Cond.getOperand(1);
	bool IllegalFPCMov = false;
	if (VT.isFloatingPoint() && !VT.isVector() &&
	!isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV()) // FPStack?
	IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());

	if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) \|\|
	Cmp.getOpcode() == X86ISD::BT) { // FIXME
	Cond = Cmp;
	AddTest = false;
	}
	} else if (CondOpcode == ISD::USUBO \|\| CondOpcode == ISD::SSUBO \|\|
	CondOpcode == ISD::UADDO \|\| CondOpcode == ISD::SADDO \|\|
	CondOpcode == ISD::UMULO \|\| CondOpcode == ISD::SMULO) {
	SDValue Value;
	X86::CondCode X86Cond;
	std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);

	CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8);
	AddTest = false;
	}

	if (AddTest) {
	// Look past the truncate if the high bits are known zero.
	if (isTruncWithZeroHighBitsInput(Cond, DAG))
	Cond = Cond.getOperand(0);

	// We know the result of AND is compared against zero. Try to match
	// it to BT.
	if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
	X86::CondCode X86CondCode;
	if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) {
	CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8);
	Cond = BT;
	AddTest = false;
	}
	}
	}

	if (AddTest) {
	CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8);
	Cond = EmitTest(Cond, X86::COND_NE, DL, DAG, Subtarget);
	}

	// a < b ? -1 : 0 -> RES = ~setcc_carry
	// a < b ? 0 : -1 -> RES = setcc_carry
	// a >= b ? -1 : 0 -> RES = setcc_carry
	// a >= b ? 0 : -1 -> RES = ~setcc_carry
	if (Cond.getOpcode() == X86ISD::SUB) {
	unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();

	if ((CondCode == X86::COND_AE \|\| CondCode == X86::COND_B) &&
	(isAllOnesConstant(Op1) \|\| isAllOnesConstant(Op2)) &&
	(isNullConstant(Op1) \|\| isNullConstant(Op2))) {
	SDValue Res =
	DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond);
	if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
	return DAG.getNOT(DL, Res, Res.getValueType());
	return Res;
	}
	}

	// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
	// widen the cmov and push the truncate through. This avoids introducing a new
	// branch during isel and doesn't add any extensions.
	if (Op.getValueType() == MVT::i8 &&
	Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
	SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
	if (T1.getValueType() == T2.getValueType() &&
	// Exclude CopyFromReg to avoid partial register stalls.
	T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
	SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
	CC, Cond);
	return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
	}
	}

	// Or finally, promote i8 cmovs if we have CMOV,
	// or i16 cmovs if it won't prevent folding a load.
	// FIXME: we should not limit promotion of i8 case to only when the CMOV is
	// legal, but EmitLoweredSelect() can not deal with these extensions
	// being inserted between two CMOV's. (in i16 case too TBN)
	// https://bugs.llvm.org/show_bug.cgi?id=40974
	if ((Op.getValueType() == MVT::i8 && Subtarget.canUseCMOV()) \|\|
	(Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) &&
	!X86::mayFoldLoad(Op2, Subtarget))) {
	Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
	Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
	SDValue Ops[] = { Op2, Op1, CC, Cond };
	SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
	return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
	}

	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
	// condition is true.
	SDValue Ops[] = { Op2, Op1, CC, Cond };
	return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops);
	}

	static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!");
	MVT VTElt = VT.getVectorElementType();
	SDLoc dl(Op);

	unsigned NumElts = VT.getVectorNumElements();

	// Extend VT if the scalar type is i8/i16 and BWI is not supported.
	MVT ExtVT = VT;
	if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) {
	// If v16i32 is to be avoided, we'll need to split and concatenate.
	if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
	return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG);

	ExtVT = MVT::getVectorVT(MVT::i32, NumElts);
	}

	// Widen to 512-bits if VLX is not supported.
	MVT WideVT = ExtVT;
	if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) {
	NumElts *= 512 / ExtVT.getSizeInBits();
	InVT = MVT::getVectorVT(MVT::i1, NumElts);
	In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT),
	In, DAG.getIntPtrConstant(0, dl));
	WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts);
	}

	SDValue V;
	MVT WideEltVT = WideVT.getVectorElementType();
	if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) \|\|
	(Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) {
	V = DAG.getNode(Op.getOpcode(), dl, WideVT, In);
	} else {
	SDValue NegOne = DAG.getConstant(-1, dl, WideVT);
	SDValue Zero = DAG.getConstant(0, dl, WideVT);
	V = DAG.getSelect(dl, WideVT, In, NegOne, Zero);
	}

	// Truncate if we had to extend i16/i8 above.
	if (VT != ExtVT) {
	WideVT = MVT::getVectorVT(VTElt, NumElts);
	V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V);
	}

	// Extract back to 128/256-bit if we widened.
	if (WideVT != VT)
	V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V,
	DAG.getIntPtrConstant(0, dl));

	return V;
	}

	static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();

	if (InVT.getVectorElementType() == MVT::i1)
	return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG);

	assert(Subtarget.hasAVX() && "Expected AVX support");
	return LowerAVXExtend(Op, DAG, Subtarget);
	}

	// Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG.
	// For sign extend this needs to handle all vector sizes and SSE4.1 and
	// non-SSE4.1 targets. For zero extend this should only handle inputs of
	// MVT::v64i8 when BWI is not supported, but AVX512 is.
	static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue In = Op->getOperand(0);
	MVT VT = Op->getSimpleValueType(0);
	MVT InVT = In.getSimpleValueType();

	MVT SVT = VT.getVectorElementType();
	MVT InSVT = InVT.getVectorElementType();
	assert(SVT.getFixedSizeInBits() > InSVT.getFixedSizeInBits());

	if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
	return SDValue();
	if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
	return SDValue();
	if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
	!(VT.is256BitVector() && Subtarget.hasAVX()) &&
	!(VT.is512BitVector() && Subtarget.hasAVX512()))
	return SDValue();

	SDLoc dl(Op);
	unsigned Opc = Op.getOpcode();
	unsigned NumElts = VT.getVectorNumElements();

	// For 256-bit vectors, we only need the lower (128-bit) half of the input.
	// For 512-bit vectors, we need 128-bits or 256-bits.
	if (InVT.getSizeInBits() > 128) {
	// Input needs to be at least the same number of elements as output, and
	// at least 128-bits.
	int InSize = InSVT.getSizeInBits() * NumElts;
	In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
	InVT = In.getSimpleValueType();
	}

	// SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
	// so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
	// need to be handled here for 256/512-bit results.
	if (Subtarget.hasInt256()) {
	assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");

	if (InVT.getVectorNumElements() != NumElts)
	return DAG.getNode(Op.getOpcode(), dl, VT, In);

	// FIXME: Apparently we create inreg operations that could be regular
	// extends.
	unsigned ExtOpc =
	Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
	: ISD::ZERO_EXTEND;
	return DAG.getNode(ExtOpc, dl, VT, In);
	}

	// pre-AVX2 256-bit extensions need to be split into 128-bit instructions.
	if (Subtarget.hasAVX()) {
	assert(VT.is256BitVector() && "256-bit vector expected");
	MVT HalfVT = VT.getHalfNumVectorElementsVT();
	int HalfNumElts = HalfVT.getVectorNumElements();

	unsigned NumSrcElts = InVT.getVectorNumElements();
	SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef);
	for (int i = 0; i != HalfNumElts; ++i)
	HiMask[i] = HalfNumElts + i;

	SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In);
	SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask);
	Hi = DAG.getNode(Opc, dl, HalfVT, Hi);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	}

	// We should only get here for sign extend.
	assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!");
	assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs");

	// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
	SDValue Curr = In;
	SDValue SignExt = Curr;

	// As SRAI is only available on i16/i32 types, we expand only up to i32
	// and handle i64 separately.
	if (InVT != MVT::v4i32) {
	MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;

	unsigned DestWidth = DestVT.getScalarSizeInBits();
	unsigned Scale = DestWidth / InSVT.getSizeInBits();

	unsigned InNumElts = InVT.getVectorNumElements();
	unsigned DestElts = DestVT.getVectorNumElements();

	// Build a shuffle mask that takes each input element and places it in the
	// MSBs of the new element size.
	SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef);
	for (unsigned i = 0; i != DestElts; ++i)
	Mask[i * Scale + (Scale - 1)] = i;

	Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask);
	Curr = DAG.getBitcast(DestVT, Curr);

	unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
	SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
	DAG.getTargetConstant(SignExtShift, dl, MVT::i8));
	}

	if (VT == MVT::v2i64) {
	assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT");
	SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
	SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT);
	SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5});
	SignExt = DAG.getBitcast(VT, SignExt);
	}

	return SignExt;
	}

	static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	SDLoc dl(Op);

	if (InVT.getVectorElementType() == MVT::i1)
	return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG);

	assert(VT.isVector() && InVT.isVector() && "Expected vector type");
	assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
	"Expected same number of elements");
	assert((VT.getVectorElementType() == MVT::i16 \|\|
	VT.getVectorElementType() == MVT::i32 \|\|
	VT.getVectorElementType() == MVT::i64) &&
	"Unexpected element type");
	assert((InVT.getVectorElementType() == MVT::i8 \|\|
	InVT.getVectorElementType() == MVT::i16 \|\|
	InVT.getVectorElementType() == MVT::i32) &&
	"Unexpected element type");

	if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
	assert(InVT == MVT::v32i8 && "Unexpected VT!");
	return splitVectorIntUnary(Op, DAG);
	}

	if (Subtarget.hasInt256())
	return Op;

	// Optimize vectors in AVX mode
	// Sign extend v8i16 to v8i32 and
	// v4i32 to v4i64
	//
	// Divide input vector into two parts
	// for v4i32 the high shuffle mask will be {2, 3, -1, -1}
	// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
	// concat the vectors to original VT
	MVT HalfVT = VT.getHalfNumVectorElementsVT();
	SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);

	unsigned NumElems = InVT.getVectorNumElements();
	SmallVector<int,8> ShufMask(NumElems, -1);
	for (unsigned i = 0; i != NumElems/2; ++i)
	ShufMask[i] = i + NumElems/2;

	SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
	OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
	}

	/// Change a vector store into a pair of half-size vector stores.
	static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
	SDValue StoredVal = Store->getValue();
	assert((StoredVal.getValueType().is256BitVector() \|\|
	StoredVal.getValueType().is512BitVector()) &&
	"Expecting 256/512-bit op");

	// Splitting volatile memory ops is not allowed unless the operation was not
	// legal to begin with. Assume the input store is legal (this transform is
	// only used for targets with AVX). Note: It is possible that we have an
	// illegal type like v2i128, and so we could allow splitting a volatile store
	// in that case if that is important.
	if (!Store->isSimple())
	return SDValue();

	SDLoc DL(Store);
	SDValue Value0, Value1;
	std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL);
	unsigned HalfOffset = Value0.getValueType().getStoreSize();
	SDValue Ptr0 = Store->getBasePtr();
	SDValue Ptr1 =
	DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(HalfOffset), DL);
	SDValue Ch0 =
	DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
	Store->getOriginalAlign(),
	Store->getMemOperand()->getFlags());
	SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
	Store->getPointerInfo().getWithOffset(HalfOffset),
	Store->getOriginalAlign(),
	Store->getMemOperand()->getFlags());
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
	}

	/// Scalarize a vector store, bitcasting to TargetVT to determine the scalar
	/// type.
	static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
	SelectionDAG &DAG) {
	SDValue StoredVal = Store->getValue();
	assert(StoreVT.is128BitVector() &&
	StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op");
	StoredVal = DAG.getBitcast(StoreVT, StoredVal);

	// Splitting volatile memory ops is not allowed unless the operation was not
	// legal to begin with. We are assuming the input op is legal (this transform
	// is only used for targets with AVX).
	if (!Store->isSimple())
	return SDValue();

	MVT StoreSVT = StoreVT.getScalarType();
	unsigned NumElems = StoreVT.getVectorNumElements();
	unsigned ScalarSize = StoreSVT.getStoreSize();

	SDLoc DL(Store);
	SmallVector<SDValue, 4> Stores;
	for (unsigned i = 0; i != NumElems; ++i) {
	unsigned Offset = i * ScalarSize;
	SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(),
	TypeSize::Fixed(Offset), DL);
	SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal,
	DAG.getIntPtrConstant(i, DL));
	SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
	Store->getPointerInfo().getWithOffset(Offset),
	Store->getOriginalAlign(),
	Store->getMemOperand()->getFlags());
	Stores.push_back(Ch);
	}
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
	}

	static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
	SDLoc dl(St);
	SDValue StoredVal = St->getValue();

	// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
	if (StoredVal.getValueType().isVector() &&
	StoredVal.getValueType().getVectorElementType() == MVT::i1) {
	unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
	assert(NumElts <= 8 && "Unexpected VT");
	assert(!St->isTruncatingStore() && "Expected non-truncating store");
	assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
	"Expected AVX512F without AVX512DQI");

	// We must pad with zeros to ensure we store zeroes to any unused bits.
	StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
	DAG.getUNDEF(MVT::v16i1), StoredVal,
	DAG.getIntPtrConstant(0, dl));
	StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
	StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
	// Make sure we store zeros in the extra bits.
	if (NumElts < 8)
	StoredVal = DAG.getZeroExtendInReg(
	StoredVal, dl, EVT::getIntegerVT(*DAG.getContext(), NumElts));

	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}

	if (St->isTruncatingStore())
	return SDValue();

	// If this is a 256-bit store of concatenated ops, we are better off splitting
	// that store into two 128-bit stores. This avoids spurious use of 256-bit ops
	// and each half can execute independently. Some cores would split the op into
	// halves anyway, so the concat (vinsertf128) is purely an extra op.
	MVT StoreVT = StoredVal.getSimpleValueType();
	if (StoreVT.is256BitVector() \|\|
	((StoreVT == MVT::v32i16 \|\| StoreVT == MVT::v64i8) &&
	!Subtarget.hasBWI())) {
	SmallVector<SDValue, 4> CatOps;
	if (StoredVal.hasOneUse() &&
	collectConcatOps(StoredVal.getNode(), CatOps, DAG))
	return splitVectorStore(St, DAG);
	return SDValue();
	}

	if (StoreVT.is32BitVector())
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	assert(StoreVT.is64BitVector() && "Unexpected VT");
	assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) ==
	TargetLowering::TypeWidenVector &&
	"Unexpected type action!");

	EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT);
	StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
	DAG.getUNDEF(StoreVT));

	if (Subtarget.hasSSE2()) {
	// Widen the vector, cast to a v2x64 type, extract the single 64-bit element
	// and store it.
	MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
	MVT CastVT = MVT::getVectorVT(StVT, 2);
	StoredVal = DAG.getBitcast(CastVT, StoredVal);
	StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
	DAG.getIntPtrConstant(0, dl));

	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}
	assert(Subtarget.hasSSE1() && "Expected SSE");
	SDVTList Tys = DAG.getVTList(MVT::Other);
	SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
	return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64,
	St->getMemOperand());
	}

	// Lower vector extended loads using a shuffle. If SSSE3 is not available we
	// may emit an illegal shuffle but the expansion is still better than scalar
	// code. We generate sext/sext_invec for SEXTLOADs if it's available, otherwise
	// we'll emit a shuffle and a arithmetic shift.
	// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
	// TODO: It is possible to support ZExt by zeroing the undef values during
	// the shuffle phase or after the shuffle.
	static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT RegVT = Op.getSimpleValueType();
	assert(RegVT.isVector() && "We only custom lower vector loads.");
	assert(RegVT.isInteger() &&
	"We only custom lower integer vector loads.");

	LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
	SDLoc dl(Ld);

	// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
	if (RegVT.getVectorElementType() == MVT::i1) {
	assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load");
	assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");
	assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
	"Expected AVX512F without AVX512DQI");

	SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), Ld->getOriginalAlign(),
	Ld->getMemOperand()->getFlags());

	// Replace chain users with the new chain.
	assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!");

	SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd);
	Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
	DAG.getBitcast(MVT::v16i1, Val),
	DAG.getIntPtrConstant(0, dl));
	return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
	}

	return SDValue();
	}

	/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
	/// each of which has no other use apart from the AND / OR.
	static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
	Opc = Op.getOpcode();
	if (Opc != ISD::OR && Opc != ISD::AND)
	return false;
	return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
	Op.getOperand(0).hasOneUse() &&
	Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
	Op.getOperand(1).hasOneUse());
	}

	SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Cond = Op.getOperand(1);
	SDValue Dest = Op.getOperand(2);
	SDLoc dl(Op);

	// Bail out when we don't have native compare instructions.
	if (Cond.getOpcode() == ISD::SETCC &&
	Cond.getOperand(0).getValueType() != MVT::f128 &&
	!isSoftFP16(Cond.getOperand(0).getValueType())) {
	SDValue LHS = Cond.getOperand(0);
	SDValue RHS = Cond.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

	// Special case for
	// setcc([su]{add,sub,mul}o == 0)
	// setcc([su]{add,sub,mul}o != 1)
	if (ISD::isOverflowIntrOpRes(LHS) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE) &&
	(isNullConstant(RHS) \|\| isOneConstant(RHS))) {
	SDValue Value, Overflow;
	X86::CondCode X86Cond;
	std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, LHS.getValue(0), DAG);

	if ((CC == ISD::SETEQ) == isNullConstant(RHS))
	X86Cond = X86::GetOppositeBranchCondition(X86Cond);

	SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Overflow);
	}

	if (LHS.getSimpleValueType().isInteger()) {
	SDValue CCVal;
	SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, CC, SDLoc(Cond), DAG, CCVal);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	EFLAGS);
	}

	if (CC == ISD::SETOEQ) {
	// For FCMP_OEQ, we can emit
	// two branches instead of an explicit AND instruction with a
	// separate test. However, we only do this if this block doesn't
	// have a fall-through edge, because this requires an explicit
	// jmp when the condition is false.
	if (Op.getNode()->hasOneUse()) {
	SDNode User = Op.getNode()->use_begin();
	// Look for an unconditional branch following this conditional branch.
	// We need this because we need to reverse the successors in order
	// to implement FCMP_OEQ.
	if (User->getOpcode() == ISD::BR) {
	SDValue FalseBB = User->getOperand(1);
	SDNode *NewBR =
	DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
	assert(NewBR == User);
	(void)NewBR;
	Dest = FalseBB;

	SDValue Cmp =
	DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
	SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
	Chain = DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest,
	CCVal, Cmp);
	CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Cmp);
	}
	}
	} else if (CC == ISD::SETUNE) {
	// For FCMP_UNE, we can emit
	// two branches instead of an explicit OR instruction with a
	// separate test.
	SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
	SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
	Chain =
	DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp);
	CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Cmp);
	} else {
	X86::CondCode X86Cond =
	TranslateX86CC(CC, dl, /IsFP/ true, LHS, RHS, DAG);
	SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
	SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Cmp);
	}
	}

	if (ISD::isOverflowIntrOpRes(Cond)) {
	SDValue Value, Overflow;
	X86::CondCode X86Cond;
	std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);

	SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Overflow);
	}

	// Look past the truncate if the high bits are known zero.
	if (isTruncWithZeroHighBitsInput(Cond, DAG))
	Cond = Cond.getOperand(0);

	EVT CondVT = Cond.getValueType();

	// Add an AND with 1 if we don't already have one.
	if (!(Cond.getOpcode() == ISD::AND && isOneConstant(Cond.getOperand(1))))
	Cond =
	DAG.getNode(ISD::AND, dl, CondVT, Cond, DAG.getConstant(1, dl, CondVT));

	SDValue LHS = Cond;
	SDValue RHS = DAG.getConstant(0, dl, CondVT);

	SDValue CCVal;
	SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, ISD::SETNE, dl, DAG, CCVal);
	return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	EFLAGS);
	}

	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
	// Calls to _alloca are needed to probe the stack when allocating more than 4k
	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
	// that the guard pages used by the OS virtual memory manager are allocated in
	// correct sequence.
	SDValue
	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	bool SplitStack = MF.shouldSplitStack();
	bool EmitStackProbeCall = hasStackProbeSymbol(MF);
	bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) \|\|
	SplitStack \|\| EmitStackProbeCall;
	SDLoc dl(Op);

	// Get the inputs.
	SDNode *Node = Op.getNode();
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);
	MaybeAlign Alignment(Op.getConstantOperandVal(2));
	EVT VT = Node->getValueType(0);

	// Chain the dynamic stack allocation so that it doesn't modify the stack
	// pointer when other instructions are using the stack.
	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);

	bool Is64Bit = Subtarget.is64Bit();
	MVT SPTy = getPointerTy(DAG.getDataLayout());

	SDValue Result;
	if (!Lower) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
	assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
	" not tell us which reg is the stack pointer!");

	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
	const Align StackAlign = TFI.getStackAlign();
	if (hasInlineStackProbe(MF)) {
	MachineRegisterInfo &MRI = MF.getRegInfo();

	const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
	Register Vreg = MRI.createVirtualRegister(AddrRegClass);
	Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
	Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
	DAG.getRegister(Vreg, SPTy));
	} else {
	SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
	Chain = SP.getValue(1);
	Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
	}
	if (Alignment && *Alignment > StackAlign)
	Result =
	DAG.getNode(ISD::AND, dl, VT, Result,
	DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
	} else if (SplitStack) {
	MachineRegisterInfo &MRI = MF.getRegInfo();

	if (Is64Bit) {
	// The 64 bit implementation of segmented stacks needs to clobber both r10
	// r11. This makes it impossible to use it along with nested parameters.
	const Function &F = MF.getFunction();
	for (const auto &A : F.args()) {
	if (A.hasNestAttr())
	report_fatal_error("Cannot use segmented stacks with functions that "
	"have nested arguments.");
	}
	}

	const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
	Register Vreg = MRI.createVirtualRegister(AddrRegClass);
	Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
	Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
	DAG.getRegister(Vreg, SPTy));
	} else {
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);
	MF.getInfo<X86MachineFunctionInfo>()->setHasDynAlloca(true);

	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	Register SPReg = RegInfo->getStackRegister();
	SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
	Chain = SP.getValue(1);

	if (Alignment) {
	SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
	DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
	}

	Result = SP;
	}

	Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);

	SDValue Ops[2] = {Result, Chain};
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	auto PtrVT = getPointerTy(MF.getDataLayout());
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();

	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	SDLoc DL(Op);

	if (!Subtarget.is64Bit() \|\|
	Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) {
	// vastart just stores the address of the VarArgsFrameIndex slot into the
	// memory location argument.
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
	return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	// __va_list_tag:
	// gp_offset (0 - 6 * 8)
	// fp_offset (48 - 48 + 8 * 16)
	// overflow_arg_area (point to parameters coming in memory).
	// reg_save_area
	SmallVector<SDValue, 8> MemOps;
	SDValue FIN = Op.getOperand(1);
	// Store gp_offset
	SDValue Store = DAG.getStore(
	Op.getOperand(0), DL,
	DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN,
	MachinePointerInfo(SV));
	MemOps.push_back(Store);

	// Store fp_offset
	FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::Fixed(4), DL);
	Store = DAG.getStore(
	Op.getOperand(0), DL,
	DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN,
	MachinePointerInfo(SV, 4));
	MemOps.push_back(Store);

	// Store ptr to overflow_arg_area
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
	SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
	Store =
	DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8));
	MemOps.push_back(Store);

	// Store ptr to reg_save_area.
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
	Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
	SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
	Store = DAG.getStore(
	Op.getOperand(0), DL, RSFIN, FIN,
	MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12));
	MemOps.push_back(Store);
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}

	SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget.is64Bit() &&
	"LowerVAARG only handles 64-bit va_arg!");
	assert(Op.getNumOperands() == 4);

	MachineFunction &MF = DAG.getMachineFunction();
	if (Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()))
	// The Win64 ABI uses char* instead of a structure.
	return DAG.expandVAArg(Op.getNode());

	SDValue Chain = Op.getOperand(0);
	SDValue SrcPtr = Op.getOperand(1);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	unsigned Align = Op.getConstantOperandVal(3);
	SDLoc dl(Op);

	EVT ArgVT = Op.getNode()->getValueType(0);
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
	uint8_t ArgMode;

	// Decide which area this value should be read from.
	// TODO: Implement the AMD64 ABI in its entirety. This simple
	// selection mechanism works only for the basic types.
	assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented");
	if (ArgVT.isFloatingPoint() && ArgSize <= 16 /bytes/) {
	ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
	} else {
	assert(ArgVT.isInteger() && ArgSize <= 32 /bytes/ &&
	"Unhandled argument type in LowerVAARG");
	ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
	}

	if (ArgMode == 2) {
	// Make sure using fp_offset makes sense.
	assert(!Subtarget.useSoftFloat() &&
	!(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) &&
	Subtarget.hasSSE1());
	}

	// Insert VAARG node into the DAG
	// VAARG returns two values: Variable Argument Address, Chain
	SDValue InstOps[] = {Chain, SrcPtr,
	DAG.getTargetConstant(ArgSize, dl, MVT::i32),
	DAG.getTargetConstant(ArgMode, dl, MVT::i8),
	DAG.getTargetConstant(Align, dl, MVT::i32)};
	SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
	SDValue VAARG = DAG.getMemIntrinsicNode(
	Subtarget.isTarget64BitLP64() ? X86ISD::VAARG_64 : X86ISD::VAARG_X32, dl,
	VTs, InstOps, MVT::i64, MachinePointerInfo(SV),
	/Alignment=/std::nullopt,
	MachineMemOperand::MOLoad \| MachineMemOperand::MOStore);
	Chain = VAARG.getValue(1);

	// Load the next argument and return it
	return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo());
	}

	static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// X86-64 va_list is a struct { i32, i32, i8, i8 }, except on Windows,
	// where a va_list is still an i8*.
	assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
	if (Subtarget.isCallingConvWin64(
	DAG.getMachineFunction().getFunction().getCallingConv()))
	// Probably a Win64 va_copy.
	return DAG.expandVACopy(Op.getNode());

	SDValue Chain = Op.getOperand(0);
	SDValue DstPtr = Op.getOperand(1);
	SDValue SrcPtr = Op.getOperand(2);
	const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
	SDLoc DL(Op);

	return DAG.getMemcpy(
	Chain, DL, DstPtr, SrcPtr,
	DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 24 : 16, DL),
	Align(Subtarget.isTarget64BitLP64() ? 8 : 4), /isVolatile/ false, false,
	false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
	}

	// Helper to get immediate/variable SSE shift opcode from other shift opcodes.
	static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) {
	switch (Opc) {
	case ISD::SHL:
	case X86ISD::VSHL:
	case X86ISD::VSHLI:
	return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI;
	case ISD::SRL:
	case X86ISD::VSRL:
	case X86ISD::VSRLI:
	return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI;
	case ISD::SRA:
	case X86ISD::VSRA:
	case X86ISD::VSRAI:
	return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI;
	}
	llvm_unreachable("Unknown target vector shift node");
	}

	/// Handle vector element shifts where the shift amount is a constant.
	/// Takes immediate version of shift as input.
	static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
	SDValue SrcOp, uint64_t ShiftAmt,
	SelectionDAG &DAG) {
	MVT ElementType = VT.getVectorElementType();

	// Bitcast the source vector to the output type, this is mainly necessary for
	// vXi8/vXi64 shifts.
	if (VT != SrcOp.getSimpleValueType())
	SrcOp = DAG.getBitcast(VT, SrcOp);

	// Fold this packed shift into its first operand if ShiftAmt is 0.
	if (ShiftAmt == 0)
	return SrcOp;

	// Check for ShiftAmt >= element width
	if (ShiftAmt >= ElementType.getSizeInBits()) {
	if (Opc == X86ISD::VSRAI)
	ShiftAmt = ElementType.getSizeInBits() - 1;
	else
	return DAG.getConstant(0, dl, VT);
	}

	assert((Opc == X86ISD::VSHLI \|\| Opc == X86ISD::VSRLI \|\| Opc == X86ISD::VSRAI)
	&& "Unknown target vector shift-by-constant node");

	// Fold this packed vector shift into a build vector if SrcOp is a
	// vector of Constants or UNDEFs.
	if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
	unsigned ShiftOpc;
	switch (Opc) {
	default: llvm_unreachable("Unknown opcode!");
	case X86ISD::VSHLI:
	ShiftOpc = ISD::SHL;
	break;
	case X86ISD::VSRLI:
	ShiftOpc = ISD::SRL;
	break;
	case X86ISD::VSRAI:
	ShiftOpc = ISD::SRA;
	break;
	}

	SDValue Amt = DAG.getConstant(ShiftAmt, dl, VT);
	if (SDValue C = DAG.FoldConstantArithmetic(ShiftOpc, dl, VT, {SrcOp, Amt}))
	return C;
	}

	return DAG.getNode(Opc, dl, VT, SrcOp,
	DAG.getTargetConstant(ShiftAmt, dl, MVT::i8));
	}

	/// Handle vector element shifts by a splat shift amount
	static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
	SDValue SrcOp, SDValue ShAmt, int ShAmtIdx,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT AmtVT = ShAmt.getSimpleValueType();
	assert(AmtVT.isVector() && "Vector shift type mismatch");
	assert(0 <= ShAmtIdx && ShAmtIdx < (int)AmtVT.getVectorNumElements() &&
	"Illegal vector splat index");

	// Move the splat element to the bottom element.
	if (ShAmtIdx != 0) {
	SmallVector<int> Mask(AmtVT.getVectorNumElements(), -1);
	Mask[0] = ShAmtIdx;
	ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask);
	}

	// Peek through any zext node if we can get back to a 128-bit source.
	if (AmtVT.getScalarSizeInBits() == 64 &&
	(ShAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
	ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
	ShAmt.getOperand(0).getValueType().isSimple() &&
	ShAmt.getOperand(0).getValueType().is128BitVector()) {
	ShAmt = ShAmt.getOperand(0);
	AmtVT = ShAmt.getSimpleValueType();
	}

	// See if we can mask off the upper elements using the existing source node.
	// The shift uses the entire lower 64-bits of the amount vector, so no need to
	// do this for vXi64 types.
	bool IsMasked = false;
	if (AmtVT.getScalarSizeInBits() < 64) {
	if (ShAmt.getOpcode() == ISD::BUILD_VECTOR \|\|
	ShAmt.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	// If the shift amount has come from a scalar, then zero-extend the scalar
	// before moving to the vector.
	ShAmt = DAG.getZExtOrTrunc(ShAmt.getOperand(0), dl, MVT::i32);
	ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, ShAmt);
	ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, ShAmt);
	AmtVT = MVT::v4i32;
	IsMasked = true;
	} else if (ShAmt.getOpcode() == ISD::AND) {
	// See if the shift amount is already masked (e.g. for rotation modulo),
	// then we can zero-extend it by setting all the other mask elements to
	// zero.
	SmallVector<SDValue> MaskElts(
	AmtVT.getVectorNumElements(),
	DAG.getConstant(0, dl, AmtVT.getScalarType()));
	MaskElts[0] = DAG.getAllOnesConstant(dl, AmtVT.getScalarType());
	SDValue Mask = DAG.getBuildVector(AmtVT, dl, MaskElts);
	if ((Mask = DAG.FoldConstantArithmetic(ISD::AND, dl, AmtVT,
	{ShAmt.getOperand(1), Mask}))) {
	ShAmt = DAG.getNode(ISD::AND, dl, AmtVT, ShAmt.getOperand(0), Mask);
	IsMasked = true;
	}
	}
	}

	// Extract if the shift amount vector is larger than 128-bits.
	if (AmtVT.getSizeInBits() > 128) {
	ShAmt = extract128BitVector(ShAmt, 0, DAG, dl);
	AmtVT = ShAmt.getSimpleValueType();
	}

	// Zero-extend bottom element to v2i64 vector type, either by extension or
	// shuffle masking.
	if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) {
	if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST \|\|
	ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) {
	ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt);
	} else if (Subtarget.hasSSE41()) {
	ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
	MVT::v2i64, ShAmt);
	} else {
	SDValue ByteShift = DAG.getTargetConstant(
	(128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
	ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
	ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
	ByteShift);
	ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
	ByteShift);
	}
	}

	// Change opcode to non-immediate version.
	Opc = getTargetVShiftUniformOpcode(Opc, true);

	// The return type has to be a 128-bit type with the same element
	// type as the input type.
	MVT EltVT = VT.getVectorElementType();
	MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());

	ShAmt = DAG.getBitcast(ShVT, ShAmt);
	return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
	}

	/// Return Mask with the necessary casting or extending
	/// for \p Mask according to \p MaskVT when lowering masking intrinsics
	static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	const SDLoc &dl) {

	if (isAllOnesConstant(Mask))
	return DAG.getConstant(1, dl, MaskVT);
	if (X86::isZeroNode(Mask))
	return DAG.getConstant(0, dl, MaskVT);

	assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!");

	if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
	assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!");
	assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
	// In case 32bit mode, bitcast i64 is illegal, extend/split it.
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
	DAG.getConstant(0, dl, MVT::i32));
	Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
	DAG.getConstant(1, dl, MVT::i32));

	Lo = DAG.getBitcast(MVT::v32i1, Lo);
	Hi = DAG.getBitcast(MVT::v32i1, Hi);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
	} else {
	MVT BitcastVT = MVT::getVectorVT(MVT::i1,
	Mask.getSimpleValueType().getSizeInBits());
	// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
	// are extracted by EXTRACT_SUBVECTOR.
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
	DAG.getBitcast(BitcastVT, Mask),
	DAG.getIntPtrConstant(0, dl));
	}
	}

	/// Return (and \p Op, \p Mask) for compare instructions or
	/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
	/// necessary casting or extending for \p Mask when lowering masking intrinsics
	static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
	SDValue PreservedSrc,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	unsigned OpcodeSelect = ISD::VSELECT;
	SDLoc dl(Op);

	if (isAllOnesConstant(Mask))
	return Op;

	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	if (PreservedSrc.isUndef())
	PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
	return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
	}

	/// Creates an SDNode for a predicated scalar operation.
	/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
	/// The mask is coming as MVT::i8 and it should be transformed
	/// to MVT::v1i1 while lowering masking intrinsics.
	/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
	/// "X86select" instead of "vselect". We just can't create the "vselect" node
	/// for a scalar instruction.
	static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
	SDValue PreservedSrc,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {

	if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
	if (MaskConst->getZExtValue() & 0x1)
	return Op;

	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	assert(Mask.getValueType() == MVT::i8 && "Unexpect type");
	SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1,
	DAG.getBitcast(MVT::v8i1, Mask),
	DAG.getIntPtrConstant(0, dl));
	if (Op.getOpcode() == X86ISD::FSETCCM \|\|
	Op.getOpcode() == X86ISD::FSETCCM_SAE \|\|
	Op.getOpcode() == X86ISD::VFPCLASSS)
	return DAG.getNode(ISD::AND, dl, VT, Op, IMask);

	if (PreservedSrc.isUndef())
	PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
	return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);
	}

	static int getSEHRegistrationNodeSize(const Function *Fn) {
	if (!Fn->hasPersonalityFn())
	report_fatal_error(
	"querying registration node size for function without personality");
	// The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
	// WinEHStatePass for the full struct definition.
	switch (classifyEHPersonality(Fn->getPersonalityFn())) {
	case EHPersonality::MSVC_X86SEH: return 24;
	case EHPersonality::MSVC_CXX: return 16;
	default: break;
	}
	report_fatal_error(
	"can only recover FP for 32-bit MSVC EH personality functions");
	}

	/// When the MSVC runtime transfers control to us, either to an outlined
	/// function or when returning to a parent frame after catching an exception, we
	/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
	/// Here's the math:
	/// RegNodeBase = EntryEBP - RegNodeSize
	/// ParentFP = RegNodeBase - ParentFrameOffset
	/// Subtracting RegNodeSize takes us to the offset of the registration node, and
	/// subtracting the offset (negative on x86) takes us back to the parent FP.
	static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
	SDValue EntryEBP) {
	MachineFunction &MF = DAG.getMachineFunction();
	SDLoc dl;

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());

	// It's possible that the parent function no longer has a personality function
	// if the exceptional code was optimized away, in which case we just return
	// the incoming EBP.
	if (!Fn->hasPersonalityFn())
	return EntryEBP;

	// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
	// registration, or the .set_setframe offset.
	MCSymbol *OffsetSym =
	MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
	GlobalValue::dropLLVMManglingEscape(Fn->getName()));
	SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
	SDValue ParentFrameOffset =
	DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);

	// Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
	// prologue to RBP in the parent function.
	const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
	if (Subtarget.is64Bit())
	return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);

	int RegNodeSize = getSEHRegistrationNodeSize(Fn);
	// RegNodeBase = EntryEBP - RegNodeSize
	// ParentFP = RegNodeBase - ParentFrameOffset
	SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
	DAG.getConstant(RegNodeSize, dl, PtrVT));
	return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
	}

	SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	// Helper to detect if the operand is CUR_DIRECTION rounding mode.
	auto isRoundModeCurDirection = [](SDValue Rnd) {
	if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
	return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;

	return false;
	};
	auto isRoundModeSAE = [](SDValue Rnd) {
	if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
	unsigned RC = C->getZExtValue();
	if (RC & X86::STATIC_ROUNDING::NO_EXC) {
	// Clear the NO_EXC bit and check remaining bits.
	RC ^= X86::STATIC_ROUNDING::NO_EXC;
	// As a convenience we allow no other bits or explicitly
	// current direction.
	return RC == 0 \|\| RC == X86::STATIC_ROUNDING::CUR_DIRECTION;
	}
	}

	return false;
	};
	auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) {
	if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
	RC = C->getZExtValue();
	if (RC & X86::STATIC_ROUNDING::NO_EXC) {
	// Clear the NO_EXC bit and check remaining bits.
	RC ^= X86::STATIC_ROUNDING::NO_EXC;
	return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT \|\|
	RC == X86::STATIC_ROUNDING::TO_NEG_INF \|\|
	RC == X86::STATIC_ROUNDING::TO_POS_INF \|\|
	RC == X86::STATIC_ROUNDING::TO_ZERO;
	}
	}

	return false;
	};

	SDLoc dl(Op);
	unsigned IntNo = Op.getConstantOperandVal(0);
	MVT VT = Op.getSimpleValueType();
	const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);

	// Propagate flags from original node to transformed node(s).
	SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags());

	if (IntrData) {
	switch(IntrData->Type) {
	case INTR_TYPE_1OP: {
	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(2);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
	Op.getOperand(1),
	DAG.getTargetConstant(RC, dl, MVT::i32));
	if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(1));
	}
	case INTR_TYPE_1OP_SAE: {
	SDValue Sae = Op.getOperand(2);

	unsigned Opc;
	if (isRoundModeCurDirection(Sae))
	Opc = IntrData->Opc0;
	else if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else
	return SDValue();

	return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1));
	}
	case INTR_TYPE_2OP: {
	SDValue Src2 = Op.getOperand(2);

	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(3);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
	Op.getOperand(1), Src2,
	DAG.getTargetConstant(RC, dl, MVT::i32));
	if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}

	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(1), Src2);
	}
	case INTR_TYPE_2OP_SAE: {
	SDValue Sae = Op.getOperand(3);

	unsigned Opc;
	if (isRoundModeCurDirection(Sae))
	Opc = IntrData->Opc0;
	else if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else
	return SDValue();

	return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2));
	}
	case INTR_TYPE_3OP:
	case INTR_TYPE_3OP_IMM8: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);

	if (IntrData->Type == INTR_TYPE_3OP_IMM8 &&
	Src3.getValueType() != MVT::i8) {
	Src3 = DAG.getTargetConstant(
	cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8);
	}

	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(4);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
	Src1, Src2, Src3,
	DAG.getTargetConstant(RC, dl, MVT::i32));
	if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}

	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	{Src1, Src2, Src3});
	}
	case INTR_TYPE_4OP_IMM8: {
	assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant);
	SDValue Src4 = Op.getOperand(4);
	if (Src4.getValueType() != MVT::i8) {
	Src4 = DAG.getTargetConstant(
	cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8);
	}

	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
	Src4);
	}
	case INTR_TYPE_1OP_MASK: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	// We add rounding mode to the Node when
	// - RC Opcode is specified and
	// - RC is not "current direction".
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(4);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	return getVectorMaskingNode(
	DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
	Src, DAG.getTargetConstant(RC, dl, MVT::i32)),
	Mask, PassThru, Subtarget, DAG);
	if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}
	return getVectorMaskingNode(
	DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru,
	Subtarget, DAG);
	}
	case INTR_TYPE_1OP_MASK_SAE: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	SDValue Rnd = Op.getOperand(4);

	unsigned Opc;
	if (isRoundModeCurDirection(Rnd))
	Opc = IntrData->Opc0;
	else if (isRoundModeSAE(Rnd))
	Opc = IntrData->Opc1;
	else
	return SDValue();

	return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru,
	Subtarget, DAG);
	}
	case INTR_TYPE_SCALAR_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue passThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	// There are 2 kinds of intrinsics in this group:
	// (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
	// (2) With rounding mode and sae - 7 operands.
	bool HasRounding = IntrWithRoundingModeOpcode != 0;
	if (Op.getNumOperands() == (5U + HasRounding)) {
	if (HasRounding) {
	SDValue Rnd = Op.getOperand(5);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	return getScalarMaskingNode(
	DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2,
	DAG.getTargetConstant(RC, dl, MVT::i32)),
	Mask, passThru, Subtarget, DAG);
	if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}
	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
	Src2),
	Mask, passThru, Subtarget, DAG);
	}

	assert(Op.getNumOperands() == (6U + HasRounding) &&
	"Unexpected intrinsic form");
	SDValue RoundingMode = Op.getOperand(5);
	unsigned Opc = IntrData->Opc0;
	if (HasRounding) {
	SDValue Sae = Op.getOperand(6);
	if (isRoundModeSAE(Sae))
	Opc = IntrWithRoundingModeOpcode;
	else if (!isRoundModeCurDirection(Sae))
	return SDValue();
	}
	return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,
	Src2, RoundingMode),
	Mask, passThru, Subtarget, DAG);
	}
	case INTR_TYPE_SCALAR_MASK_RND: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue passThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	SDValue Rnd = Op.getOperand(5);

	SDValue NewOp;
	unsigned RC = 0;
	if (isRoundModeCurDirection(Rnd))
	NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
	else if (isRoundModeSAEToX(Rnd, RC))
	NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,
	DAG.getTargetConstant(RC, dl, MVT::i32));
	else
	return SDValue();

	return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG);
	}
	case INTR_TYPE_SCALAR_MASK_SAE: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue passThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	SDValue Sae = Op.getOperand(5);
	unsigned Opc;
	if (isRoundModeCurDirection(Sae))
	Opc = IntrData->Opc0;
	else if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else
	return SDValue();

	return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
	Mask, passThru, Subtarget, DAG);
	}
	case INTR_TYPE_2OP_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue PassThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	SDValue NewOp;
	if (IntrData->Opc1 != 0) {
	SDValue Rnd = Op.getOperand(5);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,
	DAG.getTargetConstant(RC, dl, MVT::i32));
	else if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}
	if (!NewOp)
	NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
	return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_2OP_MASK_SAE: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue PassThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);

	unsigned Opc = IntrData->Opc0;
	if (IntrData->Opc1 != 0) {
	SDValue Sae = Op.getOperand(5);
	if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else if (!isRoundModeCurDirection(Sae))
	return SDValue();
	}

	return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_3OP_SCALAR_MASK_SAE: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue PassThru = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Sae = Op.getOperand(6);
	unsigned Opc;
	if (isRoundModeCurDirection(Sae))
	Opc = IntrData->Opc0;
	else if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else
	return SDValue();

	return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_3OP_MASK_SAE: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue PassThru = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);

	unsigned Opc = IntrData->Opc0;
	if (IntrData->Opc1 != 0) {
	SDValue Sae = Op.getOperand(6);
	if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else if (!isRoundModeCurDirection(Sae))
	return SDValue();
	}
	return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),
	Mask, PassThru, Subtarget, DAG);
	}
	case BLENDV: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);

	EVT MaskVT = Src3.getValueType().changeVectorElementTypeToInteger();
	Src3 = DAG.getBitcast(MaskVT, Src3);

	// Reverse the operands to match VSELECT order.
	return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);
	}
	case VPERM_2OP : {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);

	// Swap Src1 and Src2 in the node creation
	return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
	}
	case CFMA_OP_MASKZ:
	case CFMA_OP_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	MVT VT = Op.getSimpleValueType();

	SDValue PassThru = Src3;
	if (IntrData->Type == CFMA_OP_MASKZ)
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);

	// We add rounding mode to the Node when
	// - RC Opcode is specified and
	// - RC is not "current direction".
	SDValue NewOp;
	if (IntrData->Opc1 != 0) {
	SDValue Rnd = Op.getOperand(5);
	unsigned RC = 0;
	if (isRoundModeSAEToX(Rnd, RC))
	NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Src3,
	DAG.getTargetConstant(RC, dl, MVT::i32));
	else if (!isRoundModeCurDirection(Rnd))
	return SDValue();
	}
	if (!NewOp)
	NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
	return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
	}
	case IFMA_OP:
	// NOTE: We need to swizzle the operands to pass the multiply operands
	// first.
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case FPCLASSS: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Imm = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
	SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(),
	Subtarget, DAG);
	// Need to fill with zeros to ensure the bitcast will produce zeroes
	// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
	SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
	DAG.getConstant(0, dl, MVT::v8i1),
	FPclassMask, DAG.getIntPtrConstant(0, dl));
	return DAG.getBitcast(MVT::i8, Ins);
	}

	case CMP_MASK_CC: {
	MVT MaskVT = Op.getSimpleValueType();
	SDValue CC = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	if (IntrData->Opc1 != 0) {
	SDValue Sae = Op.getOperand(5);
	if (isRoundModeSAE(Sae))
	return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
	Op.getOperand(2), CC, Mask, Sae);
	if (!isRoundModeCurDirection(Sae))
	return SDValue();
	}
	//default rounding mode
	return DAG.getNode(IntrData->Opc0, dl, MaskVT,
	{Op.getOperand(1), Op.getOperand(2), CC, Mask});
	}
	case CMP_MASK_SCALAR_CC: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue CC = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);

	SDValue Cmp;
	if (IntrData->Opc1 != 0) {
	SDValue Sae = Op.getOperand(5);
	if (isRoundModeSAE(Sae))
	Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae);
	else if (!isRoundModeCurDirection(Sae))
	return SDValue();
	}
	//default rounding mode
	if (!Cmp.getNode())
	Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);

	SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
	Subtarget, DAG);
	// Need to fill with zeros to ensure the bitcast will produce zeroes
	// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
	SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
	DAG.getConstant(0, dl, MVT::v8i1),
	CmpMask, DAG.getIntPtrConstant(0, dl));
	return DAG.getBitcast(MVT::i8, Ins);
	}
	case COMI: { // Comparison intrinsics
	ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	// Some conditions require the operands to be swapped.
	if (CC == ISD::SETLT \|\| CC == ISD::SETLE)
	std::swap(LHS, RHS);

	SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
	SDValue SetCC;
	switch (CC) {
	case ISD::SETEQ: { // (ZF = 0 and PF = 0)
	SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
	SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
	SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
	break;
	}
	case ISD::SETNE: { // (ZF = 1 or PF = 1)
	SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
	SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
	SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
	break;
	}
	case ISD::SETGT: // (CF = 0 and ZF = 0)
	case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
	SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
	break;
	}
	case ISD::SETGE: // CF = 0
	case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
	SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
	break;
	default:
	llvm_unreachable("Unexpected illegal condition!");
	}
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}
	case COMI_RM: { // Comparison intrinsics with Sae
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	unsigned CondVal = Op.getConstantOperandVal(3);
	SDValue Sae = Op.getOperand(4);

	SDValue FCmp;
	if (isRoundModeCurDirection(Sae))
	FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
	DAG.getTargetConstant(CondVal, dl, MVT::i8));
	else if (isRoundModeSAE(Sae))
	FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
	DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae);
	else
	return SDValue();
	// Need to fill with zeros to ensure the bitcast will produce zeroes
	// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
	SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
	DAG.getConstant(0, dl, MVT::v16i1),
	FCmp, DAG.getIntPtrConstant(0, dl));
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32,
	DAG.getBitcast(MVT::i16, Ins));
	}
	case VSHIFT: {
	SDValue SrcOp = Op.getOperand(1);
	SDValue ShAmt = Op.getOperand(2);
	assert(ShAmt.getValueType() == MVT::i32 &&
	"Unexpected VSHIFT amount type");

	// Catch shift-by-constant.
	if (auto *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
	return getTargetVShiftByConstNode(IntrData->Opc0, dl,
	Op.getSimpleValueType(), SrcOp,
	CShAmt->getZExtValue(), DAG);

	ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, ShAmt);
	return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
	SrcOp, ShAmt, 0, Subtarget, DAG);
	}
	case COMPRESS_EXPAND_IN_REG: {
	SDValue Mask = Op.getOperand(3);
	SDValue DataToCompress = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	if (ISD::isBuildVectorAllOnes(Mask.getNode())) // return data as is
	return Op.getOperand(1);

	// Avoid false dependency.
	if (PassThru.isUndef())
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);

	return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru,
	Mask);
	}
	case FIXUPIMM:
	case FIXUPIMM_MASKZ: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue Imm = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Passthru = (IntrData->Type == FIXUPIMM)
	? Src1
	: getZeroVector(VT, Subtarget, DAG, dl);

	unsigned Opc = IntrData->Opc0;
	if (IntrData->Opc1 != 0) {
	SDValue Sae = Op.getOperand(6);
	if (isRoundModeSAE(Sae))
	Opc = IntrData->Opc1;
	else if (!isRoundModeCurDirection(Sae))
	return SDValue();
	}

	SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);

	if (Opc == X86ISD::VFIXUPIMM \|\| Opc == X86ISD::VFIXUPIMM_SAE)
	return getVectorMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG);

	return getScalarMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG);
	}
	case ROUNDP: {
	assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
	// Clear the upper bits of the rounding immediate so that the legacy
	// intrinsic can't trigger the scaling behavior of VRNDSCALE.
	auto Round = cast<ConstantSDNode>(Op.getOperand(2));
	SDValue RoundingMode =
	DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(1), RoundingMode);
	}
	case ROUNDS: {
	assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");
	// Clear the upper bits of the rounding immediate so that the legacy
	// intrinsic can't trigger the scaling behavior of VRNDSCALE.
	auto Round = cast<ConstantSDNode>(Op.getOperand(3));
	SDValue RoundingMode =
	DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), RoundingMode);
	}
	case BEXTRI: {
	assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode");

	uint64_t Imm = Op.getConstantOperandVal(2);
	SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl,
	Op.getValueType());
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
	Op.getOperand(1), Control);
	}
	// ADC/ADCX/SBB
	case ADX: {
	SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
	SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32);

	SDValue Res;
	// If the carry in is zero, then we should just use ADD/SUB instead of
	// ADC/SBB.
	if (isNullConstant(Op.getOperand(1))) {
	Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2),
	Op.getOperand(3));
	} else {
	SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
	DAG.getConstant(-1, dl, MVT::i8));
	Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),
	Op.getOperand(3), GenCF.getValue(1));
	}
	SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
	SDValue Results[] = { SetCC, Res };
	return DAG.getMergeValues(Results, dl);
	}
	case CVTPD2PS_MASK:
	case CVTPD2DQ_MASK:
	case CVTQQ2PS_MASK:
	case TRUNCATE_TO_REG: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);

	if (isAllOnesConstant(Mask))
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);

	MVT SrcVT = Src.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
	Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(),
	{Src, PassThru, Mask});
	}
	case CVTPS2PH_MASK: {
	SDValue Src = Op.getOperand(1);
	SDValue Rnd = Op.getOperand(2);
	SDValue PassThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);

	unsigned RC = 0;
	unsigned Opc = IntrData->Opc0;
	bool SAE = Src.getValueType().is512BitVector() &&
	(isRoundModeSAEToX(Rnd, RC) \|\| isRoundModeSAE(Rnd));
	if (SAE) {
	Opc = X86ISD::CVTPS2PH_SAE;
	Rnd = DAG.getTargetConstant(RC, dl, MVT::i32);
	}

	if (isAllOnesConstant(Mask))
	return DAG.getNode(Opc, dl, Op.getValueType(), Src, Rnd);

	if (SAE)
	Opc = X86ISD::MCVTPS2PH_SAE;
	else
	Opc = IntrData->Opc1;
	MVT SrcVT = Src.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
	Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	return DAG.getNode(Opc, dl, Op.getValueType(), Src, Rnd, PassThru, Mask);
	}
	case CVTNEPS2BF16_MASK: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);

	if (ISD::isBuildVectorAllOnes(Mask.getNode()))
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);

	// Break false dependency.
	if (PassThru.isUndef())
	PassThru = DAG.getConstant(0, dl, PassThru.getValueType());

	return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
	Mask);
	}
	default:
	break;
	}
	}

	switch (IntNo) {
	default: return SDValue(); // Don't custom lower most intrinsics.

	// ptest and testp intrinsics. The intrinsic these come from are designed to
	// return an integer value, not just an instruction so lower it to the ptest
	// or testp pattern and a setcc for the result.
	case Intrinsic::x86_avx512_ktestc_b:
	case Intrinsic::x86_avx512_ktestc_w:
	case Intrinsic::x86_avx512_ktestc_d:
	case Intrinsic::x86_avx512_ktestc_q:
	case Intrinsic::x86_avx512_ktestz_b:
	case Intrinsic::x86_avx512_ktestz_w:
	case Intrinsic::x86_avx512_ktestz_d:
	case Intrinsic::x86_avx512_ktestz_q:
	case Intrinsic::x86_sse41_ptestz:
	case Intrinsic::x86_sse41_ptestc:
	case Intrinsic::x86_sse41_ptestnzc:
	case Intrinsic::x86_avx_ptestz_256:
	case Intrinsic::x86_avx_ptestc_256:
	case Intrinsic::x86_avx_ptestnzc_256:
	case Intrinsic::x86_avx_vtestz_ps:
	case Intrinsic::x86_avx_vtestc_ps:
	case Intrinsic::x86_avx_vtestnzc_ps:
	case Intrinsic::x86_avx_vtestz_pd:
	case Intrinsic::x86_avx_vtestc_pd:
	case Intrinsic::x86_avx_vtestnzc_pd:
	case Intrinsic::x86_avx_vtestz_ps_256:
	case Intrinsic::x86_avx_vtestc_ps_256:
	case Intrinsic::x86_avx_vtestnzc_ps_256:
	case Intrinsic::x86_avx_vtestz_pd_256:
	case Intrinsic::x86_avx_vtestc_pd_256:
	case Intrinsic::x86_avx_vtestnzc_pd_256: {
	unsigned TestOpc = X86ISD::PTEST;
	X86::CondCode X86CC;
	switch (IntNo) {
	default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
	case Intrinsic::x86_avx512_ktestc_b:
	case Intrinsic::x86_avx512_ktestc_w:
	case Intrinsic::x86_avx512_ktestc_d:
	case Intrinsic::x86_avx512_ktestc_q:
	// CF = 1
	TestOpc = X86ISD::KTEST;
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_avx512_ktestz_b:
	case Intrinsic::x86_avx512_ktestz_w:
	case Intrinsic::x86_avx512_ktestz_d:
	case Intrinsic::x86_avx512_ktestz_q:
	TestOpc = X86ISD::KTEST;
	X86CC = X86::COND_E;
	break;
	case Intrinsic::x86_avx_vtestz_ps:
	case Intrinsic::x86_avx_vtestz_pd:
	case Intrinsic::x86_avx_vtestz_ps_256:
	case Intrinsic::x86_avx_vtestz_pd_256:
	TestOpc = X86ISD::TESTP;
	[[fallthrough]];
	case Intrinsic::x86_sse41_ptestz:
	case Intrinsic::x86_avx_ptestz_256:
	// ZF = 1
	X86CC = X86::COND_E;
	break;
	case Intrinsic::x86_avx_vtestc_ps:
	case Intrinsic::x86_avx_vtestc_pd:
	case Intrinsic::x86_avx_vtestc_ps_256:
	case Intrinsic::x86_avx_vtestc_pd_256:
	TestOpc = X86ISD::TESTP;
	[[fallthrough]];
	case Intrinsic::x86_sse41_ptestc:
	case Intrinsic::x86_avx_ptestc_256:
	// CF = 1
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_avx_vtestnzc_ps:
	case Intrinsic::x86_avx_vtestnzc_pd:
	case Intrinsic::x86_avx_vtestnzc_ps_256:
	case Intrinsic::x86_avx_vtestnzc_pd_256:
	TestOpc = X86ISD::TESTP;
	[[fallthrough]];
	case Intrinsic::x86_sse41_ptestnzc:
	case Intrinsic::x86_avx_ptestnzc_256:
	// ZF and CF = 0
	X86CC = X86::COND_A;
	break;
	}

	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
	SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}

	case Intrinsic::x86_sse42_pcmpistria128:
	case Intrinsic::x86_sse42_pcmpestria128:
	case Intrinsic::x86_sse42_pcmpistric128:
	case Intrinsic::x86_sse42_pcmpestric128:
	case Intrinsic::x86_sse42_pcmpistrio128:
	case Intrinsic::x86_sse42_pcmpestrio128:
	case Intrinsic::x86_sse42_pcmpistris128:
	case Intrinsic::x86_sse42_pcmpestris128:
	case Intrinsic::x86_sse42_pcmpistriz128:
	case Intrinsic::x86_sse42_pcmpestriz128: {
	unsigned Opcode;
	X86::CondCode X86CC;
	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::x86_sse42_pcmpistria128:
	Opcode = X86ISD::PCMPISTR;
	X86CC = X86::COND_A;
	break;
	case Intrinsic::x86_sse42_pcmpestria128:
	Opcode = X86ISD::PCMPESTR;
	X86CC = X86::COND_A;
	break;
	case Intrinsic::x86_sse42_pcmpistric128:
	Opcode = X86ISD::PCMPISTR;
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_sse42_pcmpestric128:
	Opcode = X86ISD::PCMPESTR;
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_sse42_pcmpistrio128:
	Opcode = X86ISD::PCMPISTR;
	X86CC = X86::COND_O;
	break;
	case Intrinsic::x86_sse42_pcmpestrio128:
	Opcode = X86ISD::PCMPESTR;
	X86CC = X86::COND_O;
	break;
	case Intrinsic::x86_sse42_pcmpistris128:
	Opcode = X86ISD::PCMPISTR;
	X86CC = X86::COND_S;
	break;
	case Intrinsic::x86_sse42_pcmpestris128:
	Opcode = X86ISD::PCMPESTR;
	X86CC = X86::COND_S;
	break;
	case Intrinsic::x86_sse42_pcmpistriz128:
	Opcode = X86ISD::PCMPISTR;
	X86CC = X86::COND_E;
	break;
	case Intrinsic::x86_sse42_pcmpestriz128:
	Opcode = X86ISD::PCMPESTR;
	X86CC = X86::COND_E;
	break;
	}
	SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
	SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2);
	SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG);
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}

	case Intrinsic::x86_sse42_pcmpistri128:
	case Intrinsic::x86_sse42_pcmpestri128: {
	unsigned Opcode;
	if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
	Opcode = X86ISD::PCMPISTR;
	else
	Opcode = X86ISD::PCMPESTR;

	SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
	return DAG.getNode(Opcode, dl, VTs, NewOps);
	}

	case Intrinsic::x86_sse42_pcmpistrm128:
	case Intrinsic::x86_sse42_pcmpestrm128: {
	unsigned Opcode;
	if (IntNo == Intrinsic::x86_sse42_pcmpistrm128)
	Opcode = X86ISD::PCMPISTR;
	else
	Opcode = X86ISD::PCMPESTR;

	SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
	return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1);
	}

	case Intrinsic::eh_sjlj_lsda: {
	MachineFunction &MF = DAG.getMachineFunction();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	auto &Context = MF.getMMI().getContext();
	MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
	Twine(MF.getFunctionNumber()));
	return DAG.getNode(getGlobalWrapperKind(), dl, VT,
	DAG.getMCSymbol(S, PtrVT));
	}

	case Intrinsic::x86_seh_lsda: {
	// Compute the symbol for the LSDA. We know it'll get emitted later.
	MachineFunction &MF = DAG.getMachineFunction();
	SDValue Op1 = Op.getOperand(1);
	auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
	MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
	GlobalValue::dropLLVMManglingEscape(Fn->getName()));

	// Generate a simple absolute symbol reference. This intrinsic is only
	// supported on 32-bit Windows, which isn't PIC.
	SDValue Result = DAG.getMCSymbol(LSDASym, VT);
	return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
	}

	case Intrinsic::eh_recoverfp: {
	SDValue FnOp = Op.getOperand(1);
	SDValue IncomingFPOp = Op.getOperand(2);
	GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
	auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
	if (!Fn)
	report_fatal_error(
	"llvm.eh.recoverfp must take a function as the first argument");
	return recoverFramePointer(DAG, Fn, IncomingFPOp);
	}

	case Intrinsic::localaddress: {
	// Returns one of the stack, base, or frame pointer registers, depending on
	// which is used to reference local variables.
	MachineFunction &MF = DAG.getMachineFunction();
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	unsigned Reg;
	if (RegInfo->hasBasePointer(MF))
	Reg = RegInfo->getBaseRegister();
	else { // Handles the SP or FP case.
	bool CantUseFP = RegInfo->hasStackRealignment(MF);
	if (CantUseFP)
	Reg = RegInfo->getPtrSizedStackRegister(MF);
	else
	Reg = RegInfo->getPtrSizedFrameRegister(MF);
	}
	return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
	}
	case Intrinsic::x86_avx512_vp2intersect_q_512:
	case Intrinsic::x86_avx512_vp2intersect_q_256:
	case Intrinsic::x86_avx512_vp2intersect_q_128:
	case Intrinsic::x86_avx512_vp2intersect_d_512:
	case Intrinsic::x86_avx512_vp2intersect_d_256:
	case Intrinsic::x86_avx512_vp2intersect_d_128: {
	MVT MaskVT = Op.getSimpleValueType();

	SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
	SDLoc DL(Op);

	SDValue Operation =
	DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
	Op->getOperand(1), Op->getOperand(2));

	SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
	MaskVT, Operation);
	SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
	MaskVT, Operation);
	return DAG.getMergeValues({Result0, Result1}, DL);
	}
	case Intrinsic::x86_mmx_pslli_w:
	case Intrinsic::x86_mmx_pslli_d:
	case Intrinsic::x86_mmx_pslli_q:
	case Intrinsic::x86_mmx_psrli_w:
	case Intrinsic::x86_mmx_psrli_d:
	case Intrinsic::x86_mmx_psrli_q:
	case Intrinsic::x86_mmx_psrai_w:
	case Intrinsic::x86_mmx_psrai_d: {
	SDLoc DL(Op);
	SDValue ShAmt = Op.getOperand(2);
	// If the argument is a constant, convert it to a target constant.
	if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) {
	// Clamp out of bounds shift amounts since they will otherwise be masked
	// to 8-bits which may make it no longer out of bounds.
	unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255);
	if (ShiftAmount == 0)
	return Op.getOperand(1);

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
	Op.getOperand(0), Op.getOperand(1),
	DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
	}

	unsigned NewIntrinsic;
	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::x86_mmx_pslli_w:
	NewIntrinsic = Intrinsic::x86_mmx_psll_w;
	break;
	case Intrinsic::x86_mmx_pslli_d:
	NewIntrinsic = Intrinsic::x86_mmx_psll_d;
	break;
	case Intrinsic::x86_mmx_pslli_q:
	NewIntrinsic = Intrinsic::x86_mmx_psll_q;
	break;
	case Intrinsic::x86_mmx_psrli_w:
	NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
	break;
	case Intrinsic::x86_mmx_psrli_d:
	NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
	break;
	case Intrinsic::x86_mmx_psrli_q:
	NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
	break;
	case Intrinsic::x86_mmx_psrai_w:
	NewIntrinsic = Intrinsic::x86_mmx_psra_w;
	break;
	case Intrinsic::x86_mmx_psrai_d:
	NewIntrinsic = Intrinsic::x86_mmx_psra_d;
	break;
	}

	// The vector shift intrinsics with scalars uses 32b shift amounts but
	// the sse2/mmx shift instructions reads 64 bits. Copy the 32 bits to an
	// MMX register.
	ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt);
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
	DAG.getTargetConstant(NewIntrinsic, DL,
	getPointerTy(DAG.getDataLayout())),
	Op.getOperand(1), ShAmt);
	}
	case Intrinsic::thread_pointer: {
	if (Subtarget.isTargetELF()) {
	SDLoc dl(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
	Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(
	*DAG.getContext(), Subtarget.is64Bit() ? X86AS::FS : X86AS::GS));
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
	DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr));
	}
	report_fatal_error(
	"Target OS doesn't support __builtin_thread_pointer() yet.");
	}
	}
	}

	static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Src, SDValue Mask, SDValue Base,
	SDValue Index, SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
	TLI.getPointerTy(DAG.getDataLayout()));
	EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
	// If source is undef or we know it won't be used, use a zero vector
	// to break register dependency.
	// TODO: use undef instead and let BreakFalseDeps deal with it?
	if (Src.isUndef() \|\| ISD::isBuildVectorAllOnes(Mask.getNode()))
	Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);

	// Cast mask to an integer type.
	Mask = DAG.getBitcast(MaskVT, Mask);

	MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);

	SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
	SDValue Res =
	DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
	MemIntr->getMemoryVT(), MemIntr->getMemOperand());
	return DAG.getMergeValues({Res, Res.getValue(1)}, dl);
	}

	static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
	SDValue Src, SDValue Mask, SDValue Base,
	SDValue Index, SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
	TLI.getPointerTy(DAG.getDataLayout()));
	unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
	VT.getVectorNumElements());
	MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);

	// We support two versions of the gather intrinsics. One with scalar mask and
	// one with vXi1 mask. Convert scalar to vXi1 if necessary.
	if (Mask.getValueType() != MaskVT)
	Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
	// If source is undef or we know it won't be used, use a zero vector
	// to break register dependency.
	// TODO: use undef instead and let BreakFalseDeps deal with it?
	if (Src.isUndef() \|\| ISD::isBuildVectorAllOnes(Mask.getNode()))
	Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);

	MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);

	SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
	SDValue Res =
	DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
	MemIntr->getMemoryVT(), MemIntr->getMemOperand());
	return DAG.getMergeValues({Res, Res.getValue(1)}, dl);
	}

	static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Src, SDValue Mask, SDValue Base,
	SDValue Index, SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
	TLI.getPointerTy(DAG.getDataLayout()));
	unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
	Src.getSimpleValueType().getVectorNumElements());
	MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);

	// We support two versions of the scatter intrinsics. One with scalar mask and
	// one with vXi1 mask. Convert scalar to vXi1 if necessary.
	if (Mask.getValueType() != MaskVT)
	Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);

	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
	SDValue Res =
	DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops,
	MemIntr->getMemoryVT(), MemIntr->getMemOperand());
	return Res;
	}

	static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Mask, SDValue Base, SDValue Index,
	SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
	TLI.getPointerTy(DAG.getDataLayout()));
	SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
	SDValue Segment = DAG.getRegister(0, MVT::i32);
	MVT MaskVT =
	MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
	SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
	return SDValue(Res, 0);
	}

	/// Handles the lowering of builtin intrinsics with chain that return their
	/// value into registers EDX:EAX.
	/// If operand ScrReg is a valid register identifier, then operand 2 of N is
	/// copied to SrcReg. The assumption is that SrcReg is an implicit input to
	/// TargetOpcode.
	/// Returns a Glue value which can be used to add extra copy-from-reg if the
	/// expanded intrinsics implicitly defines extra registers (i.e. not just
	/// EDX:EAX).
	static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
	SelectionDAG &DAG,
	unsigned TargetOpcode,
	unsigned SrcReg,
	const X86Subtarget &Subtarget,
	SmallVectorImpl<SDValue> &Results) {
	SDValue Chain = N->getOperand(0);
	SDValue Glue;

	if (SrcReg) {
	assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
	Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue);
	Glue = Chain.getValue(1);
	}

	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue N1Ops[] = {Chain, Glue};
	SDNode *N1 = DAG.getMachineNode(
	TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1));
	Chain = SDValue(N1, 0);

	// Reads the content of XCR and returns it in registers EDX:EAX.
	SDValue LO, HI;
	if (Subtarget.is64Bit()) {
	LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
	LO.getValue(2));
	} else {
	LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
	LO.getValue(2));
	}
	Chain = HI.getValue(1);
	Glue = HI.getValue(2);

	if (Subtarget.is64Bit()) {
	// Merge the two 32-bit values into a 64-bit one.
	SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
	DAG.getConstant(32, DL, MVT::i8));
	Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
	Results.push_back(Chain);
	return Glue;
	}

	// Use a buildpair to merge the two 32-bit values into a 64-bit one.
	SDValue Ops[] = { LO, HI };
	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
	Results.push_back(Pair);
	Results.push_back(Chain);
	return Glue;
	}

	/// Handles the lowering of builtin intrinsics that read the time stamp counter
	/// (x86_rdtsc and x86_rdtscp). This function is also used to custom lower
	/// READCYCLECOUNTER nodes.
	static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	SmallVectorImpl<SDValue> &Results) {
	// The processor's time-stamp counter (a 64-bit MSR) is stored into the
	// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
	// and the EAX register is loaded with the low-order 32 bits.
	SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
	/* NoRegister */0, Subtarget,
	Results);
	if (Opcode != X86::RDTSCP)
	return;

	SDValue Chain = Results[1];
	// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
	// the ECX register. Add 'ecx' explicitly to the chain.
	SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue);
	Results[1] = ecx;
	Results.push_back(ecx.getValue(1));
	}

	static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SmallVector<SDValue, 3> Results;
	SDLoc DL(Op);
	getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget,
	Results);
	return DAG.getMergeValues(Results, DL);
	}

	static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	SDValue Chain = Op.getOperand(0);
	SDValue RegNode = Op.getOperand(2);
	WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
	if (!EHInfo)
	report_fatal_error("EH registrations only live in functions using WinEH");

	// Cast the operand to an alloca, and remember the frame index.
	auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
	if (!FINode)
	report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca");
	EHInfo->EHRegNodeFrameIndex = FINode->getIndex();

	// Return the chain operand without making any DAG nodes.
	return Chain;
	}

	static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	SDValue Chain = Op.getOperand(0);
	SDValue EHGuard = Op.getOperand(2);
	WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
	if (!EHInfo)
	report_fatal_error("EHGuard only live in functions using WinEH");

	// Cast the operand to an alloca, and remember the frame index.
	auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard);
	if (!FINode)
	report_fatal_error("llvm.x86.seh.ehguard expects a static alloca");
	EHInfo->EHGuardFrameIndex = FINode->getIndex();

	// Return the chain operand without making any DAG nodes.
	return Chain;
	}

	/// Emit Truncating Store with signed or unsigned saturation.
	static SDValue
	EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
	SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
	SelectionDAG &DAG) {
	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
	SDValue Ops[] = { Chain, Val, Ptr, Undef };
	unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS;
	return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO);
	}

	/// Emit Masked Truncating Store with signed or unsigned saturation.
	static SDValue
	EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
	SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
	MachineMemOperand *MMO, SelectionDAG &DAG) {
	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Ops[] = { Chain, Val, Ptr, Mask };
	unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS;
	return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO);
	}

	static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	unsigned IntNo = Op.getConstantOperandVal(1);
	const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
	if (!IntrData) {
	switch (IntNo) {

	case Intrinsic::swift_async_context_addr: {
	SDLoc dl(Op);
	auto &MF = DAG.getMachineFunction();
	auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
	if (Subtarget.is64Bit()) {
	MF.getFrameInfo().setFrameAddressIsTaken(true);
	X86FI->setHasSwiftAsyncContext(true);
	SDValue Chain = Op->getOperand(0);
	SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64);
	SDValue Result =
	SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP,
	DAG.getTargetConstant(8, dl, MVT::i32)),
	0);
	// Return { result, chain }.
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
	CopyRBP.getValue(1));
	} else {
	// 32-bit so no special extended frame, create or reuse an existing
	// stack slot.
	if (!X86FI->getSwiftAsyncContextFrameIdx())
	X86FI->setSwiftAsyncContextFrameIdx(
	MF.getFrameInfo().CreateStackObject(4, Align(4), false));
	SDValue Result =
	DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
	// Return { result, chain }.
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
	Op->getOperand(0));
	}
	}

	case llvm::Intrinsic::x86_seh_ehregnode:
	return MarkEHRegistrationNode(Op, DAG);
	case llvm::Intrinsic::x86_seh_ehguard:
	return MarkEHGuard(Op, DAG);
	case llvm::Intrinsic::x86_rdpkru: {
	SDLoc dl(Op);
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
	// Create a RDPKRU node and pass 0 to the ECX parameter.
	return DAG.getNode(X86ISD::RDPKRU, dl, VTs, Op.getOperand(0),
	DAG.getConstant(0, dl, MVT::i32));
	}
	case llvm::Intrinsic::x86_wrpkru: {
	SDLoc dl(Op);
	// Create a WRPKRU node, pass the input to the EAX parameter, and pass 0
	// to the EDX and ECX parameters.
	return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other,
	Op.getOperand(0), Op.getOperand(2),
	DAG.getConstant(0, dl, MVT::i32),
	DAG.getConstant(0, dl, MVT::i32));
	}
	case llvm::Intrinsic::asan_check_memaccess: {
	// Mark this as adjustsStack because it will be lowered to a call.
	DAG.getMachineFunction().getFrameInfo().setAdjustsStack(true);
	// Don't do anything here, we will expand these intrinsics out later.
	return Op;
	}
	case llvm::Intrinsic::x86_flags_read_u32:
	case llvm::Intrinsic::x86_flags_read_u64:
	case llvm::Intrinsic::x86_flags_write_u32:
	case llvm::Intrinsic::x86_flags_write_u64: {
	// We need a frame pointer because this will get lowered to a PUSH/POP
	// sequence.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setHasCopyImplyingStackAdjustment(true);
	// Don't do anything here, we will expand these intrinsics out later
	// during FinalizeISel in EmitInstrWithCustomInserter.
	return Op;
	}
	case Intrinsic::x86_lwpins32:
	case Intrinsic::x86_lwpins64:
	case Intrinsic::x86_umwait:
	case Intrinsic::x86_tpause: {
	SDLoc dl(Op);
	SDValue Chain = Op->getOperand(0);
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
	unsigned Opcode;

	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic");
	case Intrinsic::x86_umwait:
	Opcode = X86ISD::UMWAIT;
	break;
	case Intrinsic::x86_tpause:
	Opcode = X86ISD::TPAUSE;
	break;
	case Intrinsic::x86_lwpins32:
	case Intrinsic::x86_lwpins64:
	Opcode = X86ISD::LWPINS;
	break;
	}

	SDValue Operation =
	DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
	Op->getOperand(3), Op->getOperand(4));
	SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
	Operation.getValue(1));
	}
	case Intrinsic::x86_enqcmd:
	case Intrinsic::x86_enqcmds: {
	SDLoc dl(Op);
	SDValue Chain = Op.getOperand(0);
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
	unsigned Opcode;
	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic!");
	case Intrinsic::x86_enqcmd:
	Opcode = X86ISD::ENQCMD;
	break;
	case Intrinsic::x86_enqcmds:
	Opcode = X86ISD::ENQCMDS;
	break;
	}
	SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2),
	Op.getOperand(3));
	SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
	Operation.getValue(1));
	}
	case Intrinsic::x86_aesenc128kl:
	case Intrinsic::x86_aesdec128kl:
	case Intrinsic::x86_aesenc256kl:
	case Intrinsic::x86_aesdec256kl: {
	SDLoc DL(Op);
	SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other);
	SDValue Chain = Op.getOperand(0);
	unsigned Opcode;

	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic");
	case Intrinsic::x86_aesenc128kl:
	Opcode = X86ISD::AESENC128KL;
	break;
	case Intrinsic::x86_aesdec128kl:
	Opcode = X86ISD::AESDEC128KL;
	break;
	case Intrinsic::x86_aesenc256kl:
	Opcode = X86ISD::AESENC256KL;
	break;
	case Intrinsic::x86_aesdec256kl:
	Opcode = X86ISD::AESDEC256KL;
	break;
	}

	MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
	MachineMemOperand *MMO = MemIntr->getMemOperand();
	EVT MemVT = MemIntr->getMemoryVT();
	SDValue Operation = DAG.getMemIntrinsicNode(
	Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3)}, MemVT,
	MMO);
	SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG);

	return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
	{ZF, Operation.getValue(0), Operation.getValue(2)});
	}
	case Intrinsic::x86_aesencwide128kl:
	case Intrinsic::x86_aesdecwide128kl:
	case Intrinsic::x86_aesencwide256kl:
	case Intrinsic::x86_aesdecwide256kl: {
	SDLoc DL(Op);
	SDVTList VTs = DAG.getVTList(
	{MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64,
	MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other});
	SDValue Chain = Op.getOperand(0);
	unsigned Opcode;

	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic");
	case Intrinsic::x86_aesencwide128kl:
	Opcode = X86ISD::AESENCWIDE128KL;
	break;
	case Intrinsic::x86_aesdecwide128kl:
	Opcode = X86ISD::AESDECWIDE128KL;
	break;
	case Intrinsic::x86_aesencwide256kl:
	Opcode = X86ISD::AESENCWIDE256KL;
	break;
	case Intrinsic::x86_aesdecwide256kl:
	Opcode = X86ISD::AESDECWIDE256KL;
	break;
	}

	MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
	MachineMemOperand *MMO = MemIntr->getMemOperand();
	EVT MemVT = MemIntr->getMemoryVT();
	SDValue Operation = DAG.getMemIntrinsicNode(
	Opcode, DL, VTs,
	{Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
	Op.getOperand(5), Op.getOperand(6), Op.getOperand(7),
	Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)},
	MemVT, MMO);
	SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG);

	return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
	{ZF, Operation.getValue(1), Operation.getValue(2),
	Operation.getValue(3), Operation.getValue(4),
	Operation.getValue(5), Operation.getValue(6),
	Operation.getValue(7), Operation.getValue(8),
	Operation.getValue(9)});
	}
	case Intrinsic::x86_testui: {
	SDLoc dl(Op);
	SDValue Chain = Op.getOperand(0);
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
	SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain);
	SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
	Operation.getValue(1));
	}
	case Intrinsic::x86_atomic_bts_rm:
	case Intrinsic::x86_atomic_btc_rm:
	case Intrinsic::x86_atomic_btr_rm: {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue Chain = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(2);
	SDValue Op2 = Op.getOperand(3);
	unsigned Opc = IntNo == Intrinsic::x86_atomic_bts_rm ? X86ISD::LBTS_RM
	: IntNo == Intrinsic::x86_atomic_btc_rm ? X86ISD::LBTC_RM
	: X86ISD::LBTR_RM;
	MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
	SDValue Res =
	DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
	{Chain, Op1, Op2}, VT, MMO);
	Chain = Res.getValue(1);
	Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
	return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);
	}
	case Intrinsic::x86_atomic_bts:
	case Intrinsic::x86_atomic_btc:
	case Intrinsic::x86_atomic_btr: {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue Chain = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(2);
	SDValue Op2 = Op.getOperand(3);
	unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS
	: IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC
	: X86ISD::LBTR;
	SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32);
	MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
	SDValue Res =
	DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
	{Chain, Op1, Op2, Size}, VT, MMO);
	Chain = Res.getValue(1);
	Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
	unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
	if (Imm)
	Res = DAG.getNode(ISD::SHL, DL, VT, Res,
	DAG.getShiftAmountConstant(Imm, VT, DL));
	return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);
	}
	case Intrinsic::x86_cmpccxadd32:
	case Intrinsic::x86_cmpccxadd64: {
	SDLoc DL(Op);
	SDValue Chain = Op.getOperand(0);
	SDValue Addr = Op.getOperand(2);
	SDValue Src1 = Op.getOperand(3);
	SDValue Src2 = Op.getOperand(4);
	SDValue CC = Op.getOperand(5);
	MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
	SDValue Operation = DAG.getMemIntrinsicNode(
	X86ISD::CMPCCXADD, DL, Op->getVTList(), {Chain, Addr, Src1, Src2, CC},
	MVT::i32, MMO);
	return Operation;
	}
	case Intrinsic::x86_aadd32:
	case Intrinsic::x86_aadd64:
	case Intrinsic::x86_aand32:
	case Intrinsic::x86_aand64:
	case Intrinsic::x86_aor32:
	case Intrinsic::x86_aor64:
	case Intrinsic::x86_axor32:
	case Intrinsic::x86_axor64: {
	SDLoc DL(Op);
	SDValue Chain = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(2);
	SDValue Op2 = Op.getOperand(3);
	MVT VT = Op2.getSimpleValueType();
	unsigned Opc = 0;
	switch (IntNo) {
	default:
	llvm_unreachable("Unknown Intrinsic");
	case Intrinsic::x86_aadd32:
	case Intrinsic::x86_aadd64:
	Opc = X86ISD::AADD;
	break;
	case Intrinsic::x86_aand32:
	case Intrinsic::x86_aand64:
	Opc = X86ISD::AAND;
	break;
	case Intrinsic::x86_aor32:
	case Intrinsic::x86_aor64:
	Opc = X86ISD::AOR;
	break;
	case Intrinsic::x86_axor32:
	case Intrinsic::x86_axor64:
	Opc = X86ISD::AXOR;
	break;
	}
	MachineMemOperand *MMO = cast<MemSDNode>(Op)->getMemOperand();
	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(),
	{Chain, Op1, Op2}, VT, MMO);
	}
	case Intrinsic::x86_atomic_add_cc:
	case Intrinsic::x86_atomic_sub_cc:
	case Intrinsic::x86_atomic_or_cc:
	case Intrinsic::x86_atomic_and_cc:
	case Intrinsic::x86_atomic_xor_cc: {
	SDLoc DL(Op);
	SDValue Chain = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(2);
	SDValue Op2 = Op.getOperand(3);
	X86::CondCode CC = (X86::CondCode)Op.getConstantOperandVal(4);
	MVT VT = Op2.getSimpleValueType();
	unsigned Opc = 0;
	switch (IntNo) {
	default:
	llvm_unreachable("Unknown Intrinsic");
	case Intrinsic::x86_atomic_add_cc:
	Opc = X86ISD::LADD;
	break;
	case Intrinsic::x86_atomic_sub_cc:
	Opc = X86ISD::LSUB;
	break;
	case Intrinsic::x86_atomic_or_cc:
	Opc = X86ISD::LOR;
	break;
	case Intrinsic::x86_atomic_and_cc:
	Opc = X86ISD::LAND;
	break;
	case Intrinsic::x86_atomic_xor_cc:
	Opc = X86ISD::LXOR;
	break;
	}
	MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
	SDValue LockArith =
	DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
	{Chain, Op1, Op2}, VT, MMO);
	Chain = LockArith.getValue(1);
	return DAG.getMergeValues({getSETCC(CC, LockArith, DL, DAG), Chain}, DL);
	}
	}
	return SDValue();
	}

	SDLoc dl(Op);
	switch(IntrData->Type) {
	default: llvm_unreachable("Unknown Intrinsic Type");
	case RDSEED:
	case RDRAND: {
	// Emit the node with the right value type.
	SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other);
	SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));

	// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
	// Otherwise return the value from Rand, which is always 0, casted to i32.
	SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
	DAG.getConstant(1, dl, Op->getValueType(1)),
	DAG.getTargetConstant(X86::COND_B, dl, MVT::i8),
	SDValue(Result.getNode(), 1)};
	SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops);

	// Return { result, isValid, chain }.
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
	SDValue(Result.getNode(), 2));
	}
	case GATHER_AVX2: {
	SDValue Chain = Op.getOperand(0);
	SDValue Src = Op.getOperand(2);
	SDValue Base = Op.getOperand(3);
	SDValue Index = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Scale = Op.getOperand(6);
	return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
	Scale, Chain, Subtarget);
	}
	case GATHER: {
	//gather(v1, mask, index, base, scale);
	SDValue Chain = Op.getOperand(0);
	SDValue Src = Op.getOperand(2);
	SDValue Base = Op.getOperand(3);
	SDValue Index = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Scale = Op.getOperand(6);
	return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
	Chain, Subtarget);
	}
	case SCATTER: {
	//scatter(base, mask, index, v1, scale);
	SDValue Chain = Op.getOperand(0);
	SDValue Base = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	SDValue Index = Op.getOperand(4);
	SDValue Src = Op.getOperand(5);
	SDValue Scale = Op.getOperand(6);
	return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
	Scale, Chain, Subtarget);
	}
	case PREFETCH: {
	const APInt &HintVal = Op.getConstantOperandAPInt(6);
	assert((HintVal == 2 \|\| HintVal == 3) &&
	"Wrong prefetch hint in intrinsic: should be 2 or 3");
	unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
	SDValue Chain = Op.getOperand(0);
	SDValue Mask = Op.getOperand(2);
	SDValue Index = Op.getOperand(3);
	SDValue Base = Op.getOperand(4);
	SDValue Scale = Op.getOperand(5);
	return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
	Subtarget);
	}
	// Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
	case RDTSC: {
	SmallVector<SDValue, 2> Results;
	getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget,
	Results);
	return DAG.getMergeValues(Results, dl);
	}
	// Read Performance Monitoring Counters.
	case RDPMC:
	// Read Processor Register.
	case RDPRU:
	// GetExtended Control Register.
	case XGETBV: {
	SmallVector<SDValue, 2> Results;

	// RDPMC uses ECX to select the index of the performance counter to read.
	// RDPRU uses ECX to select the processor register to read.
	// XGETBV uses ECX to select the index of the XCR register to return.
	// The result is stored into registers EDX:EAX.
	expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
	Subtarget, Results);
	return DAG.getMergeValues(Results, dl);
	}
	// XTEST intrinsics.
	case XTEST: {
	SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
	SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));

	SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
	SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
	Ret, SDValue(InTrans.getNode(), 1));
	}
	case TRUNCATE_TO_MEM_VI8:
	case TRUNCATE_TO_MEM_VI16:
	case TRUNCATE_TO_MEM_VI32: {
	SDValue Mask = Op.getOperand(4);
	SDValue DataToTruncate = Op.getOperand(3);
	SDValue Addr = Op.getOperand(2);
	SDValue Chain = Op.getOperand(0);

	MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
	assert(MemIntr && "Expected MemIntrinsicSDNode!");

	EVT MemVT = MemIntr->getMemoryVT();

	uint16_t TruncationOp = IntrData->Opc0;
	switch (TruncationOp) {
	case X86ISD::VTRUNC: {
	if (isAllOnesConstant(Mask)) // return just a truncate store
	return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT,
	MemIntr->getMemOperand());

	MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	SDValue Offset = DAG.getUNDEF(VMask.getValueType());

	return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask,
	MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED,
	true /* truncating */);
	}
	case X86ISD::VTRUNCUS:
	case X86ISD::VTRUNCS: {
	bool IsSigned = (TruncationOp == X86ISD::VTRUNCS);
	if (isAllOnesConstant(Mask))
	return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT,
	MemIntr->getMemOperand(), DAG);

	MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr,
	VMask, MemVT, MemIntr->getMemOperand(), DAG);
	}
	default:
	llvm_unreachable("Unsupported truncstore intrinsic");
	}
	}
	}
	}

	SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
	return SDValue();

	unsigned Depth = Op.getConstantOperandVal(0);
	SDLoc dl(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	if (Depth > 0) {
	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
	MachinePointerInfo());
	}

	// Just load the return address.
	SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
	MachinePointerInfo());
	}

	SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true);
	return getReturnAddressFrameIndex(DAG);
	}

	SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	EVT VT = Op.getValueType();

	MFI.setFrameAddressIsTaken(true);

	if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
	// Depth > 0 makes no sense on targets which use Windows unwind codes. It
	// is not possible to crawl up the stack without looking at the unwind codes
	// simultaneously.
	int FrameAddrIndex = FuncInfo->getFAIndex();
	if (!FrameAddrIndex) {
	// Set up a frame object for the return address.
	unsigned SlotSize = RegInfo->getSlotSize();
	FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
	SlotSize, /SPOffset=/0, /IsImmutable=/false);
	FuncInfo->setFAIndex(FrameAddrIndex);
	}
	return DAG.getFrameIndex(FrameAddrIndex, VT);
	}

	unsigned FrameReg =
	RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
	SDLoc dl(Op); // FIXME probably not meaningful
	unsigned Depth = Op.getConstantOperandVal(0);
	assert(((FrameReg == X86::RBP && VT == MVT::i64) \|\|
	(FrameReg == X86::EBP && VT == MVT::i32)) &&
	"Invalid Frame Register!");
	SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
	while (Depth--)
	FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
	MachinePointerInfo());
	return FrameAddr;
	}

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT,
	const MachineFunction &MF) const {
	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();

	Register Reg = StringSwitch<unsigned>(RegName)
	.Case("esp", X86::ESP)
	.Case("rsp", X86::RSP)
	.Case("ebp", X86::EBP)
	.Case("rbp", X86::RBP)
	.Default(0);

	if (Reg == X86::EBP \|\| Reg == X86::RBP) {
	if (!TFI.hasFP(MF))
	report_fatal_error("register " + StringRef(RegName) +
	" is allocatable: function has no frame pointer");
	#ifndef NDEBUG
	else {
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF);
	assert((FrameReg == X86::EBP \|\| FrameReg == X86::RBP) &&
	"Invalid Frame Register!");
	}
	#endif
	}

	if (Reg)
	return Reg;

	report_fatal_error("Invalid register name global variable");
	}

	SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
	SelectionDAG &DAG) const {
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
	}

	Register X86TargetLowering::getExceptionPointerRegister(
	const Constant *PersonalityFn) const {
	if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
	return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;

	return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX;
	}

	Register X86TargetLowering::getExceptionSelectorRegister(
	const Constant *PersonalityFn) const {
	// Funclet personalities don't use selectors (the runtime does the selection).
	if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
	return X86::NoRegister;
	return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
	}

	bool X86TargetLowering::needsFixedCatchObjects() const {
	return Subtarget.isTargetWin64();
	}

	SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Offset = Op.getOperand(1);
	SDValue Handler = Op.getOperand(2);
	SDLoc dl (Op);

	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
	assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) \|\|
	(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
	"Invalid Frame Register!");
	SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
	Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;

	SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
	DAG.getIntPtrConstant(RegInfo->getSlotSize(),
	dl));
	StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
	Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
	Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);

	return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
	DAG.getRegister(StoreAddrReg, PtrVT));
	}

	SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	// If the subtarget is not 64bit, we may need the global base reg
	// after isel expand pseudo, i.e., after CGBR pass ran.
	// Therefore, ask for the GlobalBaseReg now, so that the pass
	// inserts the code for us in case we need it.
	// Otherwise, we will end up in a situation where we will
	// reference a virtual register that is not defined!
	if (!Subtarget.is64Bit()) {
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	(void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
	}
	return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
	DAG.getVTList(MVT::i32, MVT::Other),
	Op.getOperand(0), Op.getOperand(1));
	}

	SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
	Op.getOperand(0), Op.getOperand(1));
	}

	SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
	Op.getOperand(0));
	}

	static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
	return Op.getOperand(0);
	}

	SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Root = Op.getOperand(0);
	SDValue Trmp = Op.getOperand(1); // trampoline
	SDValue FPtr = Op.getOperand(2); // nested function
	SDValue Nest = Op.getOperand(3); // 'nest' parameter value
	SDLoc dl (Op);

	const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

	if (Subtarget.is64Bit()) {
	SDValue OutChains[6];

	// Large code-model.
	const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
	const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.

	const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
	const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;

	const unsigned char REX_WB = 0x40 \| 0x08 \| 0x01; // REX prefix

	// Load the pointer to the nested function into R11.
	unsigned OpCode = ((MOV64ri \| N86R11) << 8) \| REX_WB; // movabsq r11
	SDValue Addr = Trmp;
	OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
	Addr, MachinePointerInfo(TrmpAddr));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(2, dl, MVT::i64));
	OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
	MachinePointerInfo(TrmpAddr, 2), Align(2));

	// Load the 'nest' parameter value into R10.
	// R10 is specified in X86CallingConv.td
	OpCode = ((MOV64ri \| N86R10) << 8) \| REX_WB; // movabsq r10
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(10, dl, MVT::i64));
	OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
	Addr, MachinePointerInfo(TrmpAddr, 10));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(12, dl, MVT::i64));
	OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
	MachinePointerInfo(TrmpAddr, 12), Align(2));

	// Jump to the nested function.
	OpCode = (JMP64r << 8) \| REX_WB; // jmpq *...
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(20, dl, MVT::i64));
	OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
	Addr, MachinePointerInfo(TrmpAddr, 20));

	unsigned char ModRM = N86R11 \| (4 << 3) \| (3 << 6); // ...r11
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(22, dl, MVT::i64));
	OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8),
	Addr, MachinePointerInfo(TrmpAddr, 22));

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	} else {
	const Function *Func =
	cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
	CallingConv::ID CC = Func->getCallingConv();
	unsigned NestReg;

	switch (CC) {
	default:
	llvm_unreachable("Unsupported calling convention");
	case CallingConv::C:
	case CallingConv::X86_StdCall: {
	// Pass 'nest' parameter in ECX.
	// Must be kept in sync with X86CallingConv.td
	NestReg = X86::ECX;

	// Check that ECX wasn't needed by an 'inreg' parameter.
	FunctionType *FTy = Func->getFunctionType();
	const AttributeList &Attrs = Func->getAttributes();

	if (!Attrs.isEmpty() && !Func->isVarArg()) {
	unsigned InRegCount = 0;
	unsigned Idx = 0;

	for (FunctionType::param_iterator I = FTy->param_begin(),
	E = FTy->param_end(); I != E; ++I, ++Idx)
	if (Attrs.hasParamAttr(Idx, Attribute::InReg)) {
	const DataLayout &DL = DAG.getDataLayout();
	// FIXME: should only count parameters that are lowered to integers.
	InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
	}

	if (InRegCount > 2) {
	report_fatal_error("Nest register in use - reduce number of inreg"
	" parameters!");
	}
	}
	break;
	}
	case CallingConv::X86_FastCall:
	case CallingConv::X86_ThisCall:
	case CallingConv::Fast:
	case CallingConv::Tail:
	case CallingConv::SwiftTail:
	// Pass 'nest' parameter in EAX.
	// Must be kept in sync with X86CallingConv.td
	NestReg = X86::EAX;
	break;
	}

	SDValue OutChains[4];
	SDValue Addr, Disp;

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(10, dl, MVT::i32));
	Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);

	// This is storing the opcode for MOV32ri.
	const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
	const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
	OutChains[0] =
	DAG.getStore(Root, dl, DAG.getConstant(MOV32ri \| N86Reg, dl, MVT::i8),
	Trmp, MachinePointerInfo(TrmpAddr));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(1, dl, MVT::i32));
	OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
	MachinePointerInfo(TrmpAddr, 1), Align(1));

	const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(5, dl, MVT::i32));
	OutChains[2] =
	DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr,
	MachinePointerInfo(TrmpAddr, 5), Align(1));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(6, dl, MVT::i32));
	OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
	MachinePointerInfo(TrmpAddr, 6), Align(1));

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}
	}

	SDValue X86TargetLowering::LowerGET_ROUNDING(SDValue Op,
	SelectionDAG &DAG) const {
	/*
	The rounding mode is in bits 11:10 of FPSR, and has the following
	settings:
	00 Round to nearest
	01 Round to -inf
	10 Round to +inf
	11 Round to 0

	GET_ROUNDING, on the other hand, expects the following:
	-1 Undefined
	0 Round to 0
	1 Round to nearest
	2 Round to +inf
	3 Round to -inf

	To perform the conversion, we use a packed lookup table of the four 2-bit
	values that we can index by FPSP[11:10]
	0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]

	(0x2d >> ((FPSR & 0xc00) >> 9)) & 3
	*/

	MachineFunction &MF = DAG.getMachineFunction();
	MVT VT = Op.getSimpleValueType();
	SDLoc DL(Op);

	// Save FP Control Word to stack slot
	int SSFI = MF.getFrameInfo().CreateStackObject(2, Align(2), false);
	SDValue StackSlot =
	DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));

	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);

	SDValue Chain = Op.getOperand(0);
	SDValue Ops[] = {Chain, StackSlot};
	Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
	DAG.getVTList(MVT::Other), Ops, MVT::i16, MPI,
	Align(2), MachineMemOperand::MOStore);

	// Load FP Control Word from stack slot
	SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2));
	Chain = CWD.getValue(1);

	// Mask and turn the control bits into a shift for the lookup table.
	SDValue Shift =
	DAG.getNode(ISD::SRL, DL, MVT::i16,
	DAG.getNode(ISD::AND, DL, MVT::i16,
	CWD, DAG.getConstant(0xc00, DL, MVT::i16)),
	DAG.getConstant(9, DL, MVT::i8));
	Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift);

	SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
	SDValue RetVal =
	DAG.getNode(ISD::AND, DL, MVT::i32,
	DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
	DAG.getConstant(3, DL, MVT::i32));

	RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);

	return DAG.getMergeValues({RetVal, Chain}, DL);
	}

	SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	SDLoc DL(Op);
	SDValue Chain = Op.getNode()->getOperand(0);

	// FP control word may be set only from data in memory. So we need to allocate
	// stack space to save/load FP control word.
	int OldCWFrameIdx = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
	SDValue StackSlot =
	DAG.getFrameIndex(OldCWFrameIdx, getPointerTy(DAG.getDataLayout()));
	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, OldCWFrameIdx);
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 2, Align(2));

	// Store FP control word into memory.
	SDValue Ops[] = {Chain, StackSlot};
	Chain = DAG.getMemIntrinsicNode(
	X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO);

	// Load FP Control Word from stack slot and clear RM field (bits 11:10).
	SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI);
	Chain = CWD.getValue(1);
	CWD = DAG.getNode(ISD::AND, DL, MVT::i16, CWD.getValue(0),
	DAG.getConstant(0xf3ff, DL, MVT::i16));

	// Calculate new rounding mode.
	SDValue NewRM = Op.getNode()->getOperand(1);
	SDValue RMBits;
	if (auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) {
	uint64_t RM = CVal->getZExtValue();
	int FieldVal;
	switch (static_cast<RoundingMode>(RM)) {
	case RoundingMode::NearestTiesToEven: FieldVal = X86::rmToNearest; break;
	case RoundingMode::TowardNegative: FieldVal = X86::rmDownward; break;
	case RoundingMode::TowardPositive: FieldVal = X86::rmUpward; break;
	case RoundingMode::TowardZero: FieldVal = X86::rmTowardZero; break;
	default:
	llvm_unreachable("rounding mode is not supported by X86 hardware");
	}
	RMBits = DAG.getConstant(FieldVal, DL, MVT::i16);
	} else {
	// Need to convert argument into bits of control word:
	// 0 Round to 0 -> 11
	// 1 Round to nearest -> 00
	// 2 Round to +inf -> 10
	// 3 Round to -inf -> 01
	// The 2-bit value needs then to be shifted so that it occupies bits 11:10.
	// To make the conversion, put all these values into a value 0xc9 and shift
	// it left depending on the rounding mode:
	// (0xc9 << 4) & 0xc00 = X86::rmTowardZero
	// (0xc9 << 6) & 0xc00 = X86::rmToNearest
	// ...
	// (0xc9 << (2 * NewRM + 4)) & 0xc00
	SDValue ShiftValue =
	DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
	DAG.getNode(ISD::ADD, DL, MVT::i32,
	DAG.getNode(ISD::SHL, DL, MVT::i32, NewRM,
	DAG.getConstant(1, DL, MVT::i8)),
	DAG.getConstant(4, DL, MVT::i32)));
	SDValue Shifted =
	DAG.getNode(ISD::SHL, DL, MVT::i16, DAG.getConstant(0xc9, DL, MVT::i16),
	ShiftValue);
	RMBits = DAG.getNode(ISD::AND, DL, MVT::i16, Shifted,
	DAG.getConstant(0xc00, DL, MVT::i16));
	}

	// Update rounding mode bits and store the new FP Control Word into stack.
	CWD = DAG.getNode(ISD::OR, DL, MVT::i16, CWD, RMBits);
	Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, Align(2));

	// Load FP control word from the slot.
	SDValue OpsLD[] = {Chain, StackSlot};
	MachineMemOperand *MMOL =
	MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 2, Align(2));
	Chain = DAG.getMemIntrinsicNode(
	X86ISD::FLDCW16m, DL, DAG.getVTList(MVT::Other), OpsLD, MVT::i16, MMOL);

	// If target supports SSE, set MXCSR as well. Rounding mode is encoded in the
	// same way but in bits 14:13.
	if (Subtarget.hasSSE1()) {
	// Store MXCSR into memory.
	Chain = DAG.getNode(
	ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
	DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32),
	StackSlot);

	// Load MXCSR from stack slot and clear RM field (bits 14:13).
	SDValue CWD = DAG.getLoad(MVT::i32, DL, Chain, StackSlot, MPI);
	Chain = CWD.getValue(1);
	CWD = DAG.getNode(ISD::AND, DL, MVT::i32, CWD.getValue(0),
	DAG.getConstant(0xffff9fff, DL, MVT::i32));

	// Shift X87 RM bits from 11:10 to 14:13.
	RMBits = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, RMBits);
	RMBits = DAG.getNode(ISD::SHL, DL, MVT::i32, RMBits,
	DAG.getConstant(3, DL, MVT::i8));

	// Update rounding mode bits and store the new FP Control Word into stack.
	CWD = DAG.getNode(ISD::OR, DL, MVT::i32, CWD, RMBits);
	Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, Align(4));

	// Load MXCSR from the slot.
	Chain = DAG.getNode(
	ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
	DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32),
	StackSlot);
	}

	return Chain;
	}

	/// Lower a vector CTLZ using native supported vector CTLZ instruction.
	//
	// i8/i16 vector implemented using dword LZCNT vector instruction
	// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
	// split the vector, perform operation on it's Lo a Hi part and
	// concatenate the results.
	static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(Op.getOpcode() == ISD::CTLZ);
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	unsigned NumElems = VT.getVectorNumElements();

	assert((EltVT == MVT::i8 \|\| EltVT == MVT::i16) &&
	"Unsupported element type");

	// Split vector, it's Lo and Hi parts will be handled in next iteration.
	if (NumElems > 16 \|\|
	(NumElems == 16 && !Subtarget.canExtendTo512DQ()))
	return splitVectorIntUnary(Op, DAG);

	MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
	assert((NewVT.is256BitVector() \|\| NewVT.is512BitVector()) &&
	"Unsupported value type for operation");

	// Use native supported vector instruction vplzcntd.
	Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
	SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
	SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
	SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);

	return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
	}

	// Lower CTLZ using a PSHUFB lookup table implementation.
	static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	int NumElts = VT.getVectorNumElements();
	int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8);
	MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes);

	// Per-nibble leading zero PSHUFB lookup table.
	const int LUT[16] = {/* 0 / 4, / 1 / 3, / 2 / 2, / 3 */ 2,
	/* 4 / 1, / 5 / 1, / 6 / 1, / 7 */ 1,
	/* 8 / 0, / 9 / 0, / a / 0, / b */ 0,
	/* c / 0, / d / 0, / e / 0, / f */ 0};

	SmallVector<SDValue, 64> LUTVec;
	for (int i = 0; i < NumBytes; ++i)
	LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
	SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec);

	// Begin by bitcasting the input to byte vector, then split those bytes
	// into lo/hi nibbles and use the PSHUFB LUT to perform CLTZ on each of them.
	// If the hi input nibble is zero then we add both results together, otherwise
	// we just take the hi result (by masking the lo result to zero before the
	// add).
	SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
	SDValue Zero = DAG.getConstant(0, DL, CurrVT);

	SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
	SDValue Lo = Op0;
	SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
	SDValue HiZ;
	if (CurrVT.is512BitVector()) {
	MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements());
	HiZ = DAG.getSetCC(DL, MaskVT, Hi, Zero, ISD::SETEQ);
	HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ);
	} else {
	HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ);
	}

	Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo);
	Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi);
	Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ);
	SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi);

	// Merge result back from vXi8 back to VT, working on the lo/hi halves
	// of the current vector width in the same way we did for the nibbles.
	// If the upper half of the input element is zero then add the halves'
	// leading zero counts together, otherwise just use the upper half's.
	// Double the width of the result until we are at target width.
	while (CurrVT != VT) {
	int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits();
	int CurrNumElts = CurrVT.getVectorNumElements();
	MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2);
	MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2);
	SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT);

	// Check if the upper half of the input element is zero.
	if (CurrVT.is512BitVector()) {
	MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements());
	HiZ = DAG.getSetCC(DL, MaskVT, DAG.getBitcast(CurrVT, Op0),
	DAG.getBitcast(CurrVT, Zero), ISD::SETEQ);
	HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ);
	} else {
	HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0),
	DAG.getBitcast(CurrVT, Zero), ISD::SETEQ);
	}
	HiZ = DAG.getBitcast(NextVT, HiZ);

	// Move the upper/lower halves to the lower bits as we'll be extending to
	// NextVT. Mask the lower result to zero if HiZ is true and add the results
	// together.
	SDValue ResNext = Res = DAG.getBitcast(NextVT, Res);
	SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift);
	SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift);
	R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1);
	Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1);
	CurrVT = NextVT;
	}

	return Res;
	}

	static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	if (Subtarget.hasCDI() &&
	// vXi8 vectors need to be promoted to 512-bits for vXi32.
	(Subtarget.canExtendTo512DQ() \|\| VT.getVectorElementType() != MVT::i8))
	return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget);

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitVectorIntUnary(Op, DAG);

	// Decompose 512-bit ops into smaller 256-bit ops.
	if (VT.is512BitVector() && !Subtarget.hasBWI())
	return splitVectorIntUnary(Op, DAG);

	assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
	return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
	}

	static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	MVT OpVT = VT;
	unsigned NumBits = VT.getSizeInBits();
	SDLoc dl(Op);
	unsigned Opc = Op.getOpcode();

	if (VT.isVector())
	return LowerVectorCTLZ(Op, dl, Subtarget, DAG);

	Op = Op.getOperand(0);
	if (VT == MVT::i8) {
	// Zero extend to i32 since there is not an i8 bsr.
	OpVT = MVT::i32;
	Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
	}

	// Issue a bsr (scan bits in reverse) which also sets EFLAGS.
	SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
	Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);

	if (Opc == ISD::CTLZ) {
	// If src is zero (i.e. bsr sets ZF), returns NumBits.
	SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
	DAG.getTargetConstant(X86::COND_E, dl, MVT::i8),
	Op.getValue(1)};
	Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
	}

	// Finally xor with NumBits-1.
	Op = DAG.getNode(ISD::XOR, dl, OpVT, Op,
	DAG.getConstant(NumBits - 1, dl, OpVT));

	if (VT == MVT::i8)
	Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
	return Op;
	}

	static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	unsigned NumBits = VT.getScalarSizeInBits();
	SDValue N0 = Op.getOperand(0);
	SDLoc dl(Op);

	assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
	"Only scalar CTTZ requires custom lowering");

	// Issue a bsf (scan bits forward) which also sets EFLAGS.
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0);

	// If src is known never zero we can skip the CMOV.
	if (DAG.isKnownNeverZero(N0))
	return Op;

	// If src is zero (i.e. bsf sets ZF), returns NumBits.
	SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT),
	DAG.getTargetConstant(X86::COND_E, dl, MVT::i8),
	Op.getValue(1)};
	return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
	}

	static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	if (VT == MVT::i16 \|\| VT == MVT::i32)
	return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);

	if (VT == MVT::v32i16 \|\| VT == MVT::v64i8)
	return splitVectorIntBinary(Op, DAG);

	assert(Op.getSimpleValueType().is256BitVector() &&
	Op.getSimpleValueType().isInteger() &&
	"Only handle AVX 256-bit vector integer operation");
	return splitVectorIntBinary(Op, DAG);
	}

	static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
	unsigned Opcode = Op.getOpcode();
	SDLoc DL(Op);

	if (VT == MVT::v32i16 \|\| VT == MVT::v64i8 \|\|
	(VT.is256BitVector() && !Subtarget.hasInt256())) {
	assert(Op.getSimpleValueType().isInteger() &&
	"Only handle AVX vector integer operation");
	return splitVectorIntBinary(Op, DAG);
	}

	// Avoid the generic expansion with min/max if we don't have pminu/pmaxu.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT SetCCResultType =
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	unsigned BitWidth = VT.getScalarSizeInBits();
	if (Opcode == ISD::USUBSAT) {
	if (!TLI.isOperationLegal(ISD::UMAX, VT) \|\| useVPTERNLOG(Subtarget, VT)) {
	// Handle a special-case with a bit-hack instead of cmp+select:
	// usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
	// If the target can use VPTERNLOG, DAGToDAG will match this as
	// "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
	// "broadcast" constant load.
	ConstantSDNode *C = isConstOrConstSplat(Y, true);
	if (C && C->getAPIntValue().isSignMask()) {
	SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
	SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
	SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
	return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
	}
	}
	if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
	// usubsat X, Y --> (X >u Y) ? X - Y : 0
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
	SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
	// TODO: Move this to DAGCombiner?
	if (SetCCResultType == VT &&
	DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
	return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
	return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
	}
	}

	if ((Opcode == ISD::SADDSAT \|\| Opcode == ISD::SSUBSAT) &&
	(!VT.isVector() \|\| VT == MVT::v2i64)) {
	APInt MinVal = APInt::getSignedMinValue(BitWidth);
	APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue Result =
	DAG.getNode(Opcode == ISD::SADDSAT ? ISD::SADDO : ISD::SSUBO, DL,
	DAG.getVTList(VT, SetCCResultType), X, Y);
	SDValue SumDiff = Result.getValue(0);
	SDValue Overflow = Result.getValue(1);
	SDValue SatMin = DAG.getConstant(MinVal, DL, VT);
	SDValue SatMax = DAG.getConstant(MaxVal, DL, VT);
	SDValue SumNeg =
	DAG.getSetCC(DL, SetCCResultType, SumDiff, Zero, ISD::SETLT);
	Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin);
	return DAG.getSelect(DL, VT, Overflow, Result, SumDiff);
	}

	// Use default expansion.
	return SDValue();
	}

	static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	if (VT == MVT::i16 \|\| VT == MVT::i32 \|\| VT == MVT::i64) {
	// Since X86 does not have CMOV for 8-bit integer, we don't convert
	// 8-bit integer abs to NEG and CMOV.
	SDLoc DL(Op);
	SDValue N0 = Op.getOperand(0);
	SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
	DAG.getConstant(0, DL, VT), N0);
	SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_NS, DL, MVT::i8),
	SDValue(Neg.getNode(), 1)};
	return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
	}

	// ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X).
	if ((VT == MVT::v2i64 \|\| VT == MVT::v4i64) && Subtarget.hasSSE41()) {
	SDLoc DL(Op);
	SDValue Src = Op.getOperand(0);
	SDValue Sub =
	DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
	return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src);
	}

	if (VT.is256BitVector() && !Subtarget.hasInt256()) {
	assert(VT.isInteger() &&
	"Only handle AVX 256-bit vector integer operation");
	return splitVectorIntUnary(Op, DAG);
	}

	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !Subtarget.hasBWI())
	return splitVectorIntUnary(Op, DAG);

	// Default to expand.
	return SDValue();
	}

	static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	// For AVX1 cases, split to use legal ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitVectorIntBinary(Op, DAG);

	if (VT == MVT::v32i16 \|\| VT == MVT::v64i8)
	return splitVectorIntBinary(Op, DAG);

	// Default to expand.
	return SDValue();
	}

	static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	// For AVX1 cases, split to use legal ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitVectorIntBinary(Op, DAG);

	if (VT == MVT::v32i16 \|\| VT == MVT::v64i8)
	return splitVectorIntBinary(Op, DAG);

	// Default to expand.
	return SDValue();
	}

	static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	// Decompose 256-bit ops into 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitVectorIntBinary(Op, DAG);

	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !Subtarget.hasBWI())
	return splitVectorIntBinary(Op, DAG);

	SDValue A = Op.getOperand(0);
	SDValue B = Op.getOperand(1);

	// Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
	// vector pairs, multiply and truncate.
	if (VT == MVT::v16i8 \|\| VT == MVT::v32i8 \|\| VT == MVT::v64i8) {
	unsigned NumElts = VT.getVectorNumElements();

	if ((VT == MVT::v16i8 && Subtarget.hasInt256()) \|\|
	(VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
	MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
	return DAG.getNode(
	ISD::TRUNCATE, dl, VT,
	DAG.getNode(ISD::MUL, dl, ExVT,
	DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A),
	DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B)));
	}

	MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);

	// Extract the lo/hi parts to any extend to i16.
	// We're going to mask off the low byte of each result element of the
	// pmullw, so it doesn't matter what's in the high byte of each 16-bit
	// element.
	SDValue Undef = DAG.getUNDEF(VT);
	SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
	SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));

	SDValue BLo, BHi;
	if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
	// If the RHS is a constant, manually unpackl/unpackh.
	SmallVector<SDValue, 16> LoOps, HiOps;
	for (unsigned i = 0; i != NumElts; i += 16) {
	for (unsigned j = 0; j != 8; ++j) {
	LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
	MVT::i16));
	HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
	MVT::i16));
	}
	}

	BLo = DAG.getBuildVector(ExVT, dl, LoOps);
	BHi = DAG.getBuildVector(ExVT, dl, HiOps);
	} else {
	BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
	BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
	}

	// Multiply, mask the lower 8bits of the lo/hi results and pack.
	SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
	SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
	return getPack(DAG, Subtarget, dl, VT, RLo, RHi);
	}

	// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
	if (VT == MVT::v4i32) {
	assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
	"Should not custom lower when pmulld is available!");

	// Extract the odd parts.
	static const int UnpackMask[] = { 1, -1, 3, -1 };
	SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
	SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);

	// Multiply the even parts.
	SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64,
	DAG.getBitcast(MVT::v2i64, A),
	DAG.getBitcast(MVT::v2i64, B));
	// Now multiply odd parts.
	SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64,
	DAG.getBitcast(MVT::v2i64, Aodds),
	DAG.getBitcast(MVT::v2i64, Bodds));

	Evens = DAG.getBitcast(VT, Evens);
	Odds = DAG.getBitcast(VT, Odds);

	// Merge the two vectors back together with a shuffle. This expands into 2
	// shuffles.
	static const int ShufMask[] = { 0, 4, 2, 6 };
	return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
	}

	assert((VT == MVT::v2i64 \|\| VT == MVT::v4i64 \|\| VT == MVT::v8i64) &&
	"Only know how to lower V2I64/V4I64/V8I64 multiply");
	assert(!Subtarget.hasDQI() && "DQI should use MULLQ");

	// Ahi = psrlqi(a, 32);
	// Bhi = psrlqi(b, 32);
	//
	// AloBlo = pmuludq(a, b);
	// AloBhi = pmuludq(a, Bhi);
	// AhiBlo = pmuludq(Ahi, b);
	//
	// Hi = psllqi(AloBhi + AhiBlo, 32);
	// return AloBlo + Hi;
	KnownBits AKnown = DAG.computeKnownBits(A);
	KnownBits BKnown = DAG.computeKnownBits(B);

	APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
	bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero);
	bool BLoIsZero = LowerBitsMask.isSubsetOf(BKnown.Zero);

	APInt UpperBitsMask = APInt::getHighBitsSet(64, 32);
	bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero);
	bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero);

	SDValue Zero = DAG.getConstant(0, dl, VT);

	// Only multiply lo/hi halves that aren't known to be zero.
	SDValue AloBlo = Zero;
	if (!ALoIsZero && !BLoIsZero)
	AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);

	SDValue AloBhi = Zero;
	if (!ALoIsZero && !BHiIsZero) {
	SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
	AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
	}

	SDValue AhiBlo = Zero;
	if (!AHiIsZero && !BLoIsZero) {
	SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
	AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
	}

	SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo);
	Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG);

	return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi);
	}

	static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
	MVT VT, bool IsSigned,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG,
	SDValue *Low = nullptr) {
	unsigned NumElts = VT.getVectorNumElements();

	// For vXi8 we will unpack the low and high half of each 128 bit lane to widen
	// to a vXi16 type. Do the multiplies, shift the results and pack the half
	// lane results back together.

	// We'll take different approaches for signed and unsigned.
	// For unsigned we'll use punpcklbw/punpckhbw to put zero extend the bytes
	// and use pmullw to calculate the full 16-bit product.
	// For signed we'll use punpcklbw/punpckbw to extend the bytes to words and
	// shift them left into the upper byte of each word. This allows us to use
	// pmulhw to calculate the full 16-bit product. This trick means we don't
	// need to sign extend the bytes to use pmullw.

	MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
	SDValue Zero = DAG.getConstant(0, dl, VT);

	SDValue ALo, AHi;
	if (IsSigned) {
	ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A));
	AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A));
	} else {
	ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero));
	AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero));
	}

	SDValue BLo, BHi;
	if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
	// If the RHS is a constant, manually unpackl/unpackh and extend.
	SmallVector<SDValue, 16> LoOps, HiOps;
	for (unsigned i = 0; i != NumElts; i += 16) {
	for (unsigned j = 0; j != 8; ++j) {
	SDValue LoOp = B.getOperand(i + j);
	SDValue HiOp = B.getOperand(i + j + 8);

	if (IsSigned) {
	LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16);
	HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16);
	LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp,
	DAG.getConstant(8, dl, MVT::i16));
	HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp,
	DAG.getConstant(8, dl, MVT::i16));
	} else {
	LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16);
	HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16);
	}

	LoOps.push_back(LoOp);
	HiOps.push_back(HiOp);
	}
	}

	BLo = DAG.getBuildVector(ExVT, dl, LoOps);
	BHi = DAG.getBuildVector(ExVT, dl, HiOps);
	} else if (IsSigned) {
	BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B));
	BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B));
	} else {
	BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero));
	BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero));
	}

	// Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
	// pack back to vXi8.
	unsigned MulOpc = IsSigned ? ISD::MULHS : ISD::MUL;
	SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo);
	SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi);

	if (Low)
	*Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi);

	return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /PackHiHalf/ true);
	}

	static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	bool IsSigned = Op->getOpcode() == ISD::MULHS;
	unsigned NumElts = VT.getVectorNumElements();
	SDValue A = Op.getOperand(0);
	SDValue B = Op.getOperand(1);

	// Decompose 256-bit ops into 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitVectorIntBinary(Op, DAG);

	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !Subtarget.hasBWI())
	return splitVectorIntBinary(Op, DAG);

	if (VT == MVT::v4i32 \|\| VT == MVT::v8i32 \|\| VT == MVT::v16i32) {
	assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) \|\|
	(VT == MVT::v8i32 && Subtarget.hasInt256()) \|\|
	(VT == MVT::v16i32 && Subtarget.hasAVX512()));

	// PMULxD operations multiply each even value (starting at 0) of LHS with
	// the related value of RHS and produce a widen result.
	// E.g., PMULUDQ <4 x i32> <a\|b\|c\|d>, <4 x i32> <e\|f\|g\|h>
	// => <2 x i64> <ae\|cg>
	//
	// In other word, to have all the results, we need to perform two PMULxD:
	// 1. one with the even values.
	// 2. one with the odd values.
	// To achieve #2, with need to place the odd values at an even position.
	//
	// Place the odd value at an even position (basically, shift all values 1
	// step to the left):
	const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
	9, -1, 11, -1, 13, -1, 15, -1};
	// <a\|b\|c\|d> => <b\|undef\|d\|undef>
	SDValue Odd0 =
	DAG.getVectorShuffle(VT, dl, A, A, ArrayRef(&Mask[0], NumElts));
	// <e\|f\|g\|h> => <f\|undef\|h\|undef>
	SDValue Odd1 =
	DAG.getVectorShuffle(VT, dl, B, B, ArrayRef(&Mask[0], NumElts));

	// Emit two multiplies, one for the lower 2 ints and one for the higher 2
	// ints.
	MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
	unsigned Opcode =
	(IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ;
	// PMULUDQ <4 x i32> <a\|b\|c\|d>, <4 x i32> <e\|f\|g\|h>
	// => <2 x i64> <ae\|cg>
	SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
	DAG.getBitcast(MulVT, A),
	DAG.getBitcast(MulVT, B)));
	// PMULUDQ <4 x i32> <b\|undef\|d\|undef>, <4 x i32> <f\|undef\|h\|undef>
	// => <2 x i64> <bf\|dh>
	SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
	DAG.getBitcast(MulVT, Odd0),
	DAG.getBitcast(MulVT, Odd1)));

	// Shuffle it back into the right order.
	SmallVector<int, 16> ShufMask(NumElts);
	for (int i = 0; i != (int)NumElts; ++i)
	ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;

	SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);

	// If we have a signed multiply but no PMULDQ fix up the result of an
	// unsigned multiply.
	if (IsSigned && !Subtarget.hasSSE41()) {
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
	DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);
	SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
	DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);

	SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
	Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
	}

	return Res;
	}

	// Only i8 vectors should need custom lowering after this.
	assert((VT == MVT::v16i8 \|\| (VT == MVT::v32i8 && Subtarget.hasInt256()) \|\|
	(VT == MVT::v64i8 && Subtarget.hasBWI())) &&
	"Unsupported vector type");

	// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
	// logical shift down the upper half and pack back to i8.

	// With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
	// and then ashr/lshr the upper bits down to the lower bits before multiply.

	if ((VT == MVT::v16i8 && Subtarget.hasInt256()) \|\|
	(VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
	MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
	unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
	SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
	SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);
	Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
	}

	return LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG);
	}

	// Custom lowering for SMULO/UMULO.
	static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	// Scalars defer to LowerXALUO.
	if (!VT.isVector())
	return LowerXALUO(Op, DAG);

	SDLoc dl(Op);
	bool IsSigned = Op->getOpcode() == ISD::SMULO;
	SDValue A = Op.getOperand(0);
	SDValue B = Op.getOperand(1);
	EVT OvfVT = Op->getValueType(1);

	if ((VT == MVT::v32i8 && !Subtarget.hasInt256()) \|\|
	(VT == MVT::v64i8 && !Subtarget.hasBWI())) {
	// Extract the LHS Lo/Hi vectors
	SDValue LHSLo, LHSHi;
	std::tie(LHSLo, LHSHi) = splitVector(A, DAG, dl);

	// Extract the RHS Lo/Hi vectors
	SDValue RHSLo, RHSHi;
	std::tie(RHSLo, RHSHi) = splitVector(B, DAG, dl);

	EVT LoOvfVT, HiOvfVT;
	std::tie(LoOvfVT, HiOvfVT) = DAG.GetSplitDestVTs(OvfVT);
	SDVTList LoVTs = DAG.getVTList(LHSLo.getValueType(), LoOvfVT);
	SDVTList HiVTs = DAG.getVTList(LHSHi.getValueType(), HiOvfVT);

	// Issue the split operations.
	SDValue Lo = DAG.getNode(Op.getOpcode(), dl, LoVTs, LHSLo, RHSLo);
	SDValue Hi = DAG.getNode(Op.getOpcode(), dl, HiVTs, LHSHi, RHSHi);

	// Join the separate data results and the overflow results.
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	SDValue Ovf = DAG.getNode(ISD::CONCAT_VECTORS, dl, OvfVT, Lo.getValue(1),
	Hi.getValue(1));

	return DAG.getMergeValues({Res, Ovf}, dl);
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT SetccVT =
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	if ((VT == MVT::v16i8 && Subtarget.hasInt256()) \|\|
	(VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
	unsigned NumElts = VT.getVectorNumElements();
	MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
	unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
	SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
	SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);

	SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);

	SDValue Ovf;
	if (IsSigned) {
	SDValue High, LowSign;
	if (OvfVT.getVectorElementType() == MVT::i1 &&
	(Subtarget.hasBWI() \|\| Subtarget.canExtendTo512DQ())) {
	// Rather the truncating try to do the compare on vXi16 or vXi32.
	// Shift the high down filling with sign bits.
	High = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Mul, 8, DAG);
	// Fill all 16 bits with the sign bit from the low.
	LowSign =
	getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExVT, Mul, 8, DAG);
	LowSign = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, LowSign,
	15, DAG);
	SetccVT = OvfVT;
	if (!Subtarget.hasBWI()) {
	// We can't do a vXi16 compare so sign extend to v16i32.
	High = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, High);
	LowSign = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, LowSign);
	}
	} else {
	// Otherwise do the compare at vXi8.
	High = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);
	High = DAG.getNode(ISD::TRUNCATE, dl, VT, High);
	LowSign =
	DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT));
	}

	Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE);
	} else {
	SDValue High =
	getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);
	if (OvfVT.getVectorElementType() == MVT::i1 &&
	(Subtarget.hasBWI() \|\| Subtarget.canExtendTo512DQ())) {
	// Rather the truncating try to do the compare on vXi16 or vXi32.
	SetccVT = OvfVT;
	if (!Subtarget.hasBWI()) {
	// We can't do a vXi16 compare so sign extend to v16i32.
	High = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, High);
	}
	} else {
	// Otherwise do the compare at vXi8.
	High = DAG.getNode(ISD::TRUNCATE, dl, VT, High);
	}

	Ovf =
	DAG.getSetCC(dl, SetccVT, High,
	DAG.getConstant(0, dl, High.getValueType()), ISD::SETNE);
	}

	Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT);

	return DAG.getMergeValues({Low, Ovf}, dl);
	}

	SDValue Low;
	SDValue High =
	LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG, &Low);

	SDValue Ovf;
	if (IsSigned) {
	// SMULO overflows if the high bits don't match the sign of the low.
	SDValue LowSign =
	DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT));
	Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE);
	} else {
	// UMULO overflows if the high bits are non-zero.
	Ovf =
	DAG.getSetCC(dl, SetccVT, High, DAG.getConstant(0, dl, VT), ISD::SETNE);
	}

	Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT);

	return DAG.getMergeValues({Low, Ovf}, dl);
	}

	SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget.isTargetWin64() && "Unexpected target");
	EVT VT = Op.getValueType();
	assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
	"Unexpected return type for lowering");

	if (isa<ConstantSDNode>(Op->getOperand(1))) {
	SmallVector<SDValue> Result;
	if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i64, DAG))
	return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), VT, Result[0], Result[1]);
	}

	RTLIB::Libcall LC;
	bool isSigned;
	switch (Op->getOpcode()) {
	default: llvm_unreachable("Unexpected request for libcall!");
	case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
	case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
	case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
	case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
	}

	SDLoc dl(Op);
	SDValue InChain = DAG.getEntryNode();

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
	EVT ArgVT = Op->getOperand(i).getValueType();
	assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
	"Unexpected argument type for lowering");
	SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
	int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
	Entry.Node = StackPtr;
	InChain =
	DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16));
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	Entry.Ty = PointerType::get(ArgTy,0);
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);
	}

	SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
	getPointerTy(DAG.getDataLayout()));

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(InChain)
	.setLibCallee(
	getLibcallCallingConv(LC),
	static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee,
	std::move(Args))
	.setInRegister()
	.setSExtResult(isSigned)
	.setZExtResult(!isSigned);

	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
	return DAG.getBitcast(VT, CallInfo.first);
	}

	SDValue X86TargetLowering::LowerWin64_FP_TO_INT128(SDValue Op,
	SelectionDAG &DAG,
	SDValue &Chain) const {
	assert(Subtarget.isTargetWin64() && "Unexpected target");
	EVT VT = Op.getValueType();
	bool IsStrict = Op->isStrictFPOpcode();

	SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
	EVT ArgVT = Arg.getValueType();

	assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
	"Unexpected return type for lowering");

	RTLIB::Libcall LC;
	if (Op->getOpcode() == ISD::FP_TO_SINT \|\|
	Op->getOpcode() == ISD::STRICT_FP_TO_SINT)
	LC = RTLIB::getFPTOSINT(ArgVT, VT);
	else
	LC = RTLIB::getFPTOUINT(ArgVT, VT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");

	SDLoc dl(Op);
	MakeLibCallOptions CallOptions;
	Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	SDValue Result;
	// Expect the i128 argument returned as a v2i64 in xmm0, cast back to the
	// expected VT (i128).
	std::tie(Result, Chain) =
	makeLibCall(DAG, LC, MVT::v2i64, Arg, CallOptions, dl, Chain);
	Result = DAG.getBitcast(VT, Result);
	return Result;
	}

	SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget.isTargetWin64() && "Unexpected target");
	EVT VT = Op.getValueType();
	bool IsStrict = Op->isStrictFPOpcode();

	SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
	EVT ArgVT = Arg.getValueType();

	assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
	"Unexpected argument type for lowering");

	RTLIB::Libcall LC;
	if (Op->getOpcode() == ISD::SINT_TO_FP \|\|
	Op->getOpcode() == ISD::STRICT_SINT_TO_FP)
	LC = RTLIB::getSINTTOFP(ArgVT, VT);
	else
	LC = RTLIB::getUINTTOFP(ArgVT, VT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");

	SDLoc dl(Op);
	MakeLibCallOptions CallOptions;
	SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	// Pass the i128 argument as an indirect argument on the stack.
	SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
	int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
	Chain = DAG.getStore(Chain, dl, Arg, StackPtr, MPI, Align(16));

	SDValue Result;
	std::tie(Result, Chain) =
	makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain);
	return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
	}

	// Return true if the required (according to Opcode) shift-imm form is natively
	// supported by the Subtarget
	static bool supportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
	unsigned Opcode) {
	if (!(VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()))
	return false;

	if (VT.getScalarSizeInBits() < 16)
	return false;

	if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
	(VT.getScalarSizeInBits() > 16 \|\| Subtarget.hasBWI()))
	return true;

	bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(VT.is256BitVector() && Subtarget.hasInt256());

	bool AShift = LShift && (Subtarget.hasAVX512() \|\|
	(VT != MVT::v2i64 && VT != MVT::v4i64));
	return (Opcode == ISD::SRA) ? AShift : LShift;
	}

	// The shift amount is a variable, but it is the same for all vector lanes.
	// These instructions are defined together with shift-immediate.
	static
	bool supportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
	unsigned Opcode) {
	return supportedVectorShiftWithImm(VT, Subtarget, Opcode);
	}

	// Return true if the required (according to Opcode) variable-shift form is
	// natively supported by the Subtarget
	static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
	unsigned Opcode) {
	if (!(VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()))
	return false;

	if (!Subtarget.hasInt256() \|\| VT.getScalarSizeInBits() < 16)
	return false;

	// vXi16 supported only on AVX-512, BWI
	if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
	return false;

	if (Subtarget.hasAVX512() &&
	(Subtarget.useAVX512Regs() \|\| !VT.is512BitVector()))
	return true;

	bool LShift = VT.is128BitVector() \|\| VT.is256BitVector();
	bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
	return (Opcode == ISD::SRA) ? AShift : LShift;
	}

	static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);
	unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);

	auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
	assert((VT == MVT::v2i64 \|\| VT == MVT::v4i64) && "Unexpected SRA type");
	MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
	SDValue Ex = DAG.getBitcast(ExVT, R);

	// ashr(R, 63) === cmp_slt(R, 0)
	if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
	assert((VT != MVT::v4i64 \|\| Subtarget.hasInt256()) &&
	"Unsupported PCMPGT op");
	return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R);
	}

	if (ShiftAmt >= 32) {
	// Splat sign to upper i32 dst, and SRA upper i32 src to lower i32.
	SDValue Upper =
	getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG);
	SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
	ShiftAmt - 32, DAG);
	if (VT == MVT::v2i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3});
	if (VT == MVT::v4i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
	{9, 1, 11, 3, 13, 5, 15, 7});
	} else {
	// SRA upper i32, SRL whole i64 and select lower i32.
	SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
	ShiftAmt, DAG);
	SDValue Lower =
	getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);
	Lower = DAG.getBitcast(ExVT, Lower);
	if (VT == MVT::v2i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3});
	if (VT == MVT::v4i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
	{8, 1, 10, 3, 12, 5, 14, 7});
	}
	return DAG.getBitcast(VT, Ex);
	};

	// Optimize shl/srl/sra with constant shift amount.
	APInt APIntShiftAmt;
	if (!X86::isConstantSplat(Amt, APIntShiftAmt))
	return SDValue();

	// If the shift amount is out of range, return undef.
	if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
	return DAG.getUNDEF(VT);

	uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();

	if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) {
	// Hardware support for vector shifts is sparse which makes us scalarize the
	// vector operations in many cases. Also, on sandybridge ADD is faster than
	// shl: (shl V, 1) -> (add (freeze V), (freeze V))
	if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) {
	// R may be undef at run-time, but (shl R, 1) must be an even number (LSB
	// must be 0). (add undef, undef) however can be any value. To make this
	// safe, we must freeze R to ensure that register allocation uses the same
	// register for an undefined value. This ensures that the result will
	// still be even and preserves the original semantics.
	R = DAG.getFreeze(R);
	return DAG.getNode(ISD::ADD, dl, VT, R, R);
	}

	return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
	}

	// i64 SRA needs to be performed as partial shifts.
	if (((!Subtarget.hasXOP() && VT == MVT::v2i64) \|\|
	(Subtarget.hasInt256() && VT == MVT::v4i64)) &&
	Op.getOpcode() == ISD::SRA)
	return ArithmeticShiftRight64(ShiftAmt);

	if (VT == MVT::v16i8 \|\| (Subtarget.hasInt256() && VT == MVT::v32i8) \|\|
	(Subtarget.hasBWI() && VT == MVT::v64i8)) {
	unsigned NumElts = VT.getVectorNumElements();
	MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);

	// Simple i8 add case
	if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) {
	// R may be undef at run-time, but (shl R, 1) must be an even number (LSB
	// must be 0). (add undef, undef) however can be any value. To make this
	// safe, we must freeze R to ensure that register allocation uses the same
	// register for an undefined value. This ensures that the result will
	// still be even and preserves the original semantics.
	R = DAG.getFreeze(R);
	return DAG.getNode(ISD::ADD, dl, VT, R, R);
	}

	// ashr(R, 7) === cmp_slt(R, 0)
	if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
	SDValue Zeros = DAG.getConstant(0, dl, VT);
	if (VT.is512BitVector()) {
	assert(VT == MVT::v64i8 && "Unexpected element type!");
	SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT);
	return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
	}
	return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
	}

	// XOP can shift v16i8 directly instead of as shift v8i16 + mask.
	if (VT == MVT::v16i8 && Subtarget.hasXOP())
	return SDValue();

	if (Op.getOpcode() == ISD::SHL) {
	// Make a large shift.
	SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R,
	ShiftAmt, DAG);
	SHL = DAG.getBitcast(VT, SHL);
	// Zero out the rightmost bits.
	APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt);
	return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT));
	}
	if (Op.getOpcode() == ISD::SRL) {
	// Make a large shift.
	SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R,
	ShiftAmt, DAG);
	SRL = DAG.getBitcast(VT, SRL);
	// Zero out the leftmost bits.
	APInt Mask = APInt::getLowBitsSet(8, 8 - ShiftAmt);
	return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(Mask, dl, VT));
	}
	if (Op.getOpcode() == ISD::SRA) {
	// ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
	SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);

	SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
	Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
	Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
	return Res;
	}
	llvm_unreachable("Unknown shift opcode.");
	}

	return SDValue();
	}

	static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);
	unsigned Opcode = Op.getOpcode();
	unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);

	int BaseShAmtIdx = -1;
	if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
	if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode))
	return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx,
	Subtarget, DAG);

	// vXi8 shifts - shift as v8i16 + mask result.
	if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) \|\|
	(VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) \|\|
	VT == MVT::v64i8) &&
	!Subtarget.hasXOP()) {
	unsigned NumElts = VT.getVectorNumElements();
	MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
	if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
	unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
	unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);

	// Create the mask using vXi16 shifts. For shift-rights we need to move
	// the upper byte down before splatting the vXi8 mask.
	SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
	BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
	BaseShAmt, BaseShAmtIdx, Subtarget, DAG);
	if (Opcode != ISD::SHL)
	BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
	8, DAG);
	BitMask = DAG.getBitcast(VT, BitMask);
	BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask,
	SmallVector<int, 64>(NumElts, 0));

	SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
	DAG.getBitcast(ExtVT, R), BaseShAmt,
	BaseShAmtIdx, Subtarget, DAG);
	Res = DAG.getBitcast(VT, Res);
	Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);

	if (Opcode == ISD::SRA) {
	// ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
	// SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
	SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
	SignMask =
	getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, BaseShAmt,
	BaseShAmtIdx, Subtarget, DAG);
	SignMask = DAG.getBitcast(VT, SignMask);
	Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
	Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
	}
	return Res;
	}
	}
	}

	return SDValue();
	}

	// Convert a shift/rotate left amount to a multiplication scale factor.
	static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Amt.getSimpleValueType();
	if (!(VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	(Subtarget.hasInt256() && VT == MVT::v16i16) \|\|
	(Subtarget.hasAVX512() && VT == MVT::v32i16) \|\|
	(!Subtarget.hasAVX512() && VT == MVT::v16i8) \|\|
	(Subtarget.hasInt256() && VT == MVT::v32i8) \|\|
	(Subtarget.hasBWI() && VT == MVT::v64i8)))
	return SDValue();

	MVT SVT = VT.getVectorElementType();
	unsigned SVTBits = SVT.getSizeInBits();
	unsigned NumElems = VT.getVectorNumElements();

	APInt UndefElts;
	SmallVector<APInt> EltBits;
	if (getTargetConstantBitsFromNode(Amt, SVTBits, UndefElts, EltBits)) {
	APInt One(SVTBits, 1);
	SmallVector<SDValue> Elts(NumElems, DAG.getUNDEF(SVT));
	for (unsigned I = 0; I != NumElems; ++I) {
	if (UndefElts[I] \|\| EltBits[I].uge(SVTBits))
	continue;
	uint64_t ShAmt = EltBits[I].getZExtValue();
	Elts[I] = DAG.getConstant(One.shl(ShAmt), dl, SVT);
	}
	return DAG.getBuildVector(VT, dl, Elts);
	}

	// If the target doesn't support variable shifts, use either FP conversion
	// or integer multiplication to avoid shifting each element individually.
	if (VT == MVT::v4i32) {
	Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt,
	DAG.getConstant(0x3f800000U, dl, VT));
	Amt = DAG.getBitcast(MVT::v4f32, Amt);
	return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt);
	}

	// AVX2 can more effectively perform this as a zext/trunc to/from v8i32.
	if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) {
	SDValue Z = DAG.getConstant(0, dl, VT);
	SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));
	SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));
	Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG);
	Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG);
	if (Subtarget.hasSSE41())
	return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
	return getPack(DAG, Subtarget, dl, VT, Lo, Hi);
	}

	return SDValue();
	}

	static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());

	unsigned Opc = Op.getOpcode();
	unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true);
	unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false);

	assert(VT.isVector() && "Custom lowering only for vector shifts!");
	assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");

	if (SDValue V = LowerShiftByScalarImmediate(Op, DAG, Subtarget))
	return V;

	if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
	return V;

	if (supportedVectorVarShift(VT, Subtarget, Opc))
	return Op;

	// i64 vector arithmetic shift can be emulated with the transform:
	// M = lshr(SIGN_MASK, Amt)
	// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
	if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) \|\|
	(VT == MVT::v4i64 && Subtarget.hasInt256())) &&
	Opc == ISD::SRA) {
	SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
	SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
	R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
	R = DAG.getNode(ISD::XOR, dl, VT, R, M);
	R = DAG.getNode(ISD::SUB, dl, VT, R, M);
	return R;
	}

	// XOP has 128-bit variable logical/arithmetic shifts.
	// +ve/-ve Amt = shift left/right.
	if (Subtarget.hasXOP() && (VT == MVT::v2i64 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v8i16 \|\| VT == MVT::v16i8)) {
	if (Opc == ISD::SRL \|\| Opc == ISD::SRA) {
	SDValue Zero = DAG.getConstant(0, dl, VT);
	Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
	}
	if (Opc == ISD::SHL \|\| Opc == ISD::SRL)
	return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
	if (Opc == ISD::SRA)
	return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
	}

	// 2i64 vector logical shifts can efficiently avoid scalarization - do the
	// shifts per-lane and then shuffle the partial results back together.
	if (VT == MVT::v2i64 && Opc != ISD::SRA) {
	// Splat the shift amounts so the scalar shifts above will catch it.
	SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
	SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
	SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
	SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
	return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
	}

	// If possible, lower this shift as a sequence of two shifts by
	// constant plus a BLENDing shuffle instead of scalarizing it.
	// Example:
	// (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
	//
	// Could be rewritten as:
	// (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
	//
	// The advantage is that the two shifts from the example would be
	// lowered as X86ISD::VSRLI nodes in parallel before blending.
	if (ConstantAmt && (VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	(VT == MVT::v16i16 && Subtarget.hasInt256()))) {
	SDValue Amt1, Amt2;
	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<int, 8> ShuffleMask;
	for (unsigned i = 0; i != NumElts; ++i) {
	SDValue A = Amt->getOperand(i);
	if (A.isUndef()) {
	ShuffleMask.push_back(SM_SentinelUndef);
	continue;
	}
	if (!Amt1 \|\| Amt1 == A) {
	ShuffleMask.push_back(i);
	Amt1 = A;
	continue;
	}
	if (!Amt2 \|\| Amt2 == A) {
	ShuffleMask.push_back(i + NumElts);
	Amt2 = A;
	continue;
	}
	break;
	}

	// Only perform this blend if we can perform it without loading a mask.
	if (ShuffleMask.size() == NumElts && Amt1 && Amt2 &&
	(VT != MVT::v16i16 \|\|
	is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&
	(VT == MVT::v4i32 \|\| Subtarget.hasSSE41() \|\| Opc != ISD::SHL \|\|
	canWidenShuffleElements(ShuffleMask))) {
	auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1);
	auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2);
	if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) &&
	Cst2->getAPIntValue().ult(EltSizeInBits)) {
	SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
	Cst1->getZExtValue(), DAG);
	SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
	Cst2->getZExtValue(), DAG);
	return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
	}
	}
	}

	// If possible, lower this packed shift into a vector multiply instead of
	// expanding it into a sequence of scalar shifts.
	// For v32i8 cases, it might be quicker to split/extend to vXi16 shifts.
	if (Opc == ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() \|\|
	Subtarget.canExtendTo512BW())))
	if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
	return DAG.getNode(ISD::MUL, dl, VT, R, Scale);

	// Constant ISD::SRL can be performed efficiently on vXi16 vectors as we
	// can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
	if (Opc == ISD::SRL && ConstantAmt &&
	(VT == MVT::v8i16 \|\| (VT == MVT::v16i16 && Subtarget.hasInt256()))) {
	SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
	SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
	if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ);
	SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale);
	return DAG.getSelect(dl, VT, ZAmt, R, Res);
	}
	}

	// Constant ISD::SRA can be performed efficiently on vXi16 vectors as we
	// can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt).
	// TODO: Special case handling for shift by 0/1, really we can afford either
	// of these cases in pre-SSE41/XOP/AVX512 but not both.
	if (Opc == ISD::SRA && ConstantAmt &&
	(VT == MVT::v8i16 \|\| (VT == MVT::v16i16 && Subtarget.hasInt256())) &&
	((Subtarget.hasSSE41() && !Subtarget.hasXOP() &&
	!Subtarget.hasAVX512()) \|\|
	DAG.isKnownNeverZero(Amt))) {
	SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
	SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
	if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
	SDValue Amt0 =
	DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ);
	SDValue Amt1 =
	DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ);
	SDValue Sra1 =
	getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG);
	SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale);
	Res = DAG.getSelect(dl, VT, Amt0, R, Res);
	return DAG.getSelect(dl, VT, Amt1, Sra1, Res);
	}
	}

	// v4i32 Non Uniform Shifts.
	// If the shift amount is constant we can shift each lane using the SSE2
	// immediate shifts, else we need to zero-extend each lane to the lower i64
	// and shift using the SSE2 variable shifts.
	// The separate results can then be blended together.
	if (VT == MVT::v4i32) {
	SDValue Amt0, Amt1, Amt2, Amt3;
	if (ConstantAmt) {
	Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
	Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
	Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
	Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
	} else {
	// The SSE2 shifts use the lower i64 as the same shift amount for
	// all lanes and the upper i64 is ignored. On AVX we're better off
	// just zero-extending, but for SSE just duplicating the top 16-bits is
	// cheaper and has the same effect for out of range values.
	if (Subtarget.hasAVX()) {
	SDValue Z = DAG.getConstant(0, dl, VT);
	Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
	Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
	Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
	Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
	} else {
	SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt);
	SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
	{4, 5, 6, 7, -1, -1, -1, -1});
	SDValue Msk02 = getV4X86ShuffleImm8ForMask({0, 1, 1, 1}, dl, DAG);
	SDValue Msk13 = getV4X86ShuffleImm8ForMask({2, 3, 3, 3}, dl, DAG);
	Amt0 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk02);
	Amt1 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk13);
	Amt2 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk02);
	Amt3 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk13);
	}
	}

	unsigned ShOpc = ConstantAmt ? Opc : X86OpcV;
	SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));
	SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));
	SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));
	SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));

	// Merge the shifted lane results optimally with/without PBLENDW.
	// TODO - ideally shuffle combining would handle this.
	if (Subtarget.hasSSE41()) {
	SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
	SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
	return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
	}
	SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5});
	SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7});
	return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7});
	}

	// It's worth extending once and using the vXi16/vXi32 shifts for smaller
	// types, but without AVX512 the extra overheads to get from vXi8 to vXi32
	// make the existing SSE solution better.
	// NOTE: We honor prefered vector width before promoting to 512-bits.
	if ((Subtarget.hasInt256() && VT == MVT::v8i16) \|\|
	(Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) \|\|
	(Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) \|\|
	(Subtarget.canExtendTo512BW() && VT == MVT::v32i8) \|\|
	(Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) {
	assert((!Subtarget.hasBWI() \|\| VT == MVT::v32i8 \|\| VT == MVT::v16i8) &&
	"Unexpected vector type");
	MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
	MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
	unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	R = DAG.getNode(ExtOpc, dl, ExtVT, R);
	Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt);
	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getNode(Opc, dl, ExtVT, R, Amt));
	}

	// Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we
	// extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI.
	if (ConstantAmt && (Opc == ISD::SRA \|\| Opc == ISD::SRL) &&
	(VT == MVT::v16i8 \|\| (VT == MVT::v32i8 && Subtarget.hasInt256()) \|\|
	(VT == MVT::v64i8 && Subtarget.hasBWI())) &&
	!Subtarget.hasXOP()) {
	int NumElts = VT.getVectorNumElements();
	SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8);

	// Extend constant shift amount to vXi16 (it doesn't matter if the type
	// isn't legal).
	MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
	Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT);
	Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt);
	Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt);
	assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) &&
	"Constant build vector expected");

	if (VT == MVT::v16i8 && Subtarget.hasInt256()) {
	R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT)
	: DAG.getZExtOrTrunc(R, dl, ExVT);
	R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt);
	R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8);
	return DAG.getZExtOrTrunc(R, dl, VT);
	}

	SmallVector<SDValue, 16> LoAmt, HiAmt;
	for (int i = 0; i != NumElts; i += 16) {
	for (int j = 0; j != 8; ++j) {
	LoAmt.push_back(Amt.getOperand(i + j));
	HiAmt.push_back(Amt.getOperand(i + j + 8));
	}
	}

	MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2);
	SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt);
	SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt);

	SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R));
	SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R));
	LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8);
	HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8);
	LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA);
	HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA);
	LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8);
	HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8);
	return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
	}

	if (VT == MVT::v16i8 \|\|
	(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) \|\|
	(VT == MVT::v64i8 && Subtarget.hasBWI())) {
	MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);

	auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
	if (VT.is512BitVector()) {
	// On AVX512BW targets we make use of the fact that VSELECT lowers
	// to a masked blend which selects bytes based just on the sign bit
	// extracted to a mask.
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	V0 = DAG.getBitcast(VT, V0);
	V1 = DAG.getBitcast(VT, V1);
	Sel = DAG.getBitcast(VT, Sel);
	Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel,
	ISD::SETGT);
	return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
	} else if (Subtarget.hasSSE41()) {
	// On SSE41 targets we can use PBLENDVB which selects bytes based just
	// on the sign bit.
	V0 = DAG.getBitcast(VT, V0);
	V1 = DAG.getBitcast(VT, V1);
	Sel = DAG.getBitcast(VT, Sel);
	return DAG.getBitcast(SelVT,
	DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1));
	}
	// On pre-SSE41 targets we test for the sign bit by comparing to
	// zero - a negative value will set all bits of the lanes to true
	// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
	SDValue Z = DAG.getConstant(0, dl, SelVT);
	SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
	return DAG.getSelect(dl, SelVT, C, V0, V1);
	};

	// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
	// We can safely do this using i16 shifts as we're only interested in
	// the 3 lower bits of each byte.
	Amt = DAG.getBitcast(ExtVT, Amt);
	Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG);
	Amt = DAG.getBitcast(VT, Amt);

	if (Opc == ISD::SHL \|\| Opc == ISD::SRL) {
	// r = VSELECT(r, shift(r, 4), a);
	SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));
	R = SignBitSelect(VT, Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// r = VSELECT(r, shift(r, 2), a);
	M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT));
	R = SignBitSelect(VT, Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// return VSELECT(r, shift(r, 1), a);
	M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT));
	R = SignBitSelect(VT, Amt, M, R);
	return R;
	}

	if (Opc == ISD::SRA) {
	// For SRA we need to unpack each byte to the higher byte of a i16 vector
	// so we can correctly sign extend. We don't care what happens to the
	// lower byte.
	SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
	SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
	SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R);
	SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R);
	ALo = DAG.getBitcast(ExtVT, ALo);
	AHi = DAG.getBitcast(ExtVT, AHi);
	RLo = DAG.getBitcast(ExtVT, RLo);
	RHi = DAG.getBitcast(ExtVT, RHi);

	// r = VSELECT(r, shift(r, 4), a);
	SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG);
	SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG);
	RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
	RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);

	// a += a
	ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
	AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);

	// r = VSELECT(r, shift(r, 2), a);
	MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG);
	MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG);
	RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
	RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);

	// a += a
	ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
	AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);

	// r = VSELECT(r, shift(r, 1), a);
	MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG);
	MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG);
	RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
	RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);

	// Logical shift the result back to the lower byte, leaving a zero upper
	// byte meaning that we can safely pack with PACKUSWB.
	RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG);
	RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG);
	return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
	}
	}

	if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
	MVT ExtVT = MVT::v8i32;
	SDValue Z = DAG.getConstant(0, dl, VT);
	SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z);
	SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z);
	SDValue RLo = getUnpackl(DAG, dl, VT, Z, R);
	SDValue RHi = getUnpackh(DAG, dl, VT, Z, R);
	ALo = DAG.getBitcast(ExtVT, ALo);
	AHi = DAG.getBitcast(ExtVT, AHi);
	RLo = DAG.getBitcast(ExtVT, RLo);
	RHi = DAG.getBitcast(ExtVT, RHi);
	SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
	SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
	Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG);
	Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG);
	return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
	}

	if (VT == MVT::v8i16) {
	// If we have a constant shift amount, the non-SSE41 path is best as
	// avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
	bool UseSSE41 = Subtarget.hasSSE41() &&
	!ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());

	auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
	// On SSE41 targets we can use PBLENDVB which selects bytes based just on
	// the sign bit.
	if (UseSSE41) {
	MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
	V0 = DAG.getBitcast(ExtVT, V0);
	V1 = DAG.getBitcast(ExtVT, V1);
	Sel = DAG.getBitcast(ExtVT, Sel);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1));
	}
	// On pre-SSE41 targets we splat the sign bit - a negative value will
	// set all bits of the lanes to true and VSELECT uses that in
	// its OR(AND(V0,C),AND(V1,~C)) lowering.
	SDValue C =
	getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG);
	return DAG.getSelect(dl, VT, C, V0, V1);
	};

	// Turn 'a' into a mask suitable for VSELECT: a = a << 12;
	if (UseSSE41) {
	// On SSE41 targets we need to replicate the shift mask in both
	// bytes for PBLENDVB.
	Amt = DAG.getNode(
	ISD::OR, dl, VT,
	getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG),
	getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG));
	} else {
	Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG);
	}

	// r = VSELECT(r, shift(r, 8), a);
	SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG);
	R = SignBitSelect(Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// r = VSELECT(r, shift(r, 4), a);
	M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG);
	R = SignBitSelect(Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// r = VSELECT(r, shift(r, 2), a);
	M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG);
	R = SignBitSelect(Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// return VSELECT(r, shift(r, 1), a);
	M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG);
	R = SignBitSelect(Amt, M, R);
	return R;
	}

	// Decompose 256-bit shifts into 128-bit shifts.
	if (VT.is256BitVector())
	return splitVectorIntBinary(Op, DAG);

	if (VT == MVT::v32i16 \|\| VT == MVT::v64i8)
	return splitVectorIntBinary(Op, DAG);

	return SDValue();
	}

	static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	assert((Op.getOpcode() == ISD::FSHL \|\| Op.getOpcode() == ISD::FSHR) &&
	"Unexpected funnel shift opcode!");

	SDLoc DL(Op);
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue Amt = Op.getOperand(2);
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	bool IsFSHR = Op.getOpcode() == ISD::FSHR;

	if (VT.isVector()) {
	APInt APIntShiftAmt;
	bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);

	if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
	if (IsFSHR)
	std::swap(Op0, Op1);

	if (IsCstSplat) {
	uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
	SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
	return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
	{Op0, Op1, Imm}, DAG, Subtarget);
	}
	return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
	{Op0, Op1, Amt}, DAG, Subtarget);
	}
	assert((VT == MVT::v16i8 \|\| VT == MVT::v32i8 \|\| VT == MVT::v64i8 \|\|
	VT == MVT::v8i16 \|\| VT == MVT::v16i16 \|\| VT == MVT::v32i16 \|\|
	VT == MVT::v4i32 \|\| VT == MVT::v8i32 \|\| VT == MVT::v16i32) &&
	"Unexpected funnel shift type!");

	// fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.
	// fshr(x,y,z) -> unpack(y,x) >> (z & (bw-1))).
	if (IsCstSplat)
	return SDValue();

	SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
	SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
	bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode());

	// Constant vXi16 funnel shifts can be efficiently handled by default.
	if (IsCst && EltSizeInBits == 16)
	return SDValue();

	unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
	unsigned NumElts = VT.getVectorNumElements();
	MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
	MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);

	// Split 256-bit integers on XOP/pre-AVX2 targets.
	// Split 512-bit integers on non 512-bit BWI targets.
	if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) \|\|
	!Subtarget.hasAVX2())) \|\|
	(VT.is512BitVector() && !Subtarget.useBWIRegs() &&
	EltSizeInBits < 32)) {
	// Pre-mask the amount modulo using the wider vector.
	Op = DAG.getNode(Op.getOpcode(), DL, VT, Op0, Op1, AmtMod);
	return splitVectorOp(Op, DAG);
	}

	// Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
	if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) {
	int ScalarAmtIdx = -1;
	if (SDValue ScalarAmt = DAG.getSplatSourceVector(AmtMod, ScalarAmtIdx)) {
	// Uniform vXi16 funnel shifts can be efficiently handled by default.
	if (EltSizeInBits == 16)
	return SDValue();

	SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
	SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
	Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt,
	ScalarAmtIdx, Subtarget, DAG);
	Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt,
	ScalarAmtIdx, Subtarget, DAG);
	return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
	}
	}

	MVT WideSVT = MVT::getIntegerVT(
	std::min<unsigned>(EltSizeInBits * 2, Subtarget.hasBWI() ? 16 : 32));
	MVT WideVT = MVT::getVectorVT(WideSVT, NumElts);

	// If per-element shifts are legal, fallback to generic expansion.
	if (supportedVectorVarShift(VT, Subtarget, ShiftOpc) \|\| Subtarget.hasXOP())
	return SDValue();

	// Attempt to fold as:
	// fshl(x,y,z) -> (((aext(x) << bw) \| zext(y)) << (z & (bw-1))) >> bw.
	// fshr(x,y,z) -> (((aext(x) << bw) \| zext(y)) >> (z & (bw-1))).
	if (supportedVectorVarShift(WideVT, Subtarget, ShiftOpc) &&
	supportedVectorShiftWithImm(WideVT, Subtarget, ShiftOpc)) {
	Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Op0);
	Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op1);
	AmtMod = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, AmtMod);
	Op0 = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, WideVT, Op0,
	EltSizeInBits, DAG);
	SDValue Res = DAG.getNode(ISD::OR, DL, WideVT, Op0, Op1);
	Res = DAG.getNode(ShiftOpc, DL, WideVT, Res, AmtMod);
	if (!IsFSHR)
	Res = getTargetVShiftByConstNode(X86ISD::VSRLI, DL, WideVT, Res,
	EltSizeInBits, DAG);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
	}

	// Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z)
	if (((IsCst \|\| !Subtarget.hasAVX512()) && !IsFSHR && EltSizeInBits <= 16) \|\|
	supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
	SDValue Z = DAG.getConstant(0, DL, VT);
	SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
	SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
	SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
	SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
	SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
	SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
	return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
	}

	// Fallback to generic expansion.
	return SDValue();
	}
	assert(
	(VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\| VT == MVT::i64) &&
	"Unexpected funnel shift type!");

	// Expand slow SHLD/SHRD cases if we are not optimizing for size.
	bool OptForSize = DAG.shouldOptForSize();
	bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();

	// fshl(x,y,z) -> (((aext(x) << bw) \| zext(y)) << (z & (bw-1))) >> bw.
	// fshr(x,y,z) -> (((aext(x) << bw) \| zext(y)) >> (z & (bw-1))).
	if ((VT == MVT::i8 \|\| (ExpandFunnel && VT == MVT::i16)) &&
	!isa<ConstantSDNode>(Amt)) {
	SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
	SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
	Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32);
	Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32);
	Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask);
	SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
	Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1);
	if (IsFSHR) {
	Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt);
	} else {
	Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt);
	Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift);
	}
	return DAG.getZExtOrTrunc(Res, DL, VT);
	}

	if (VT == MVT::i8 \|\| ExpandFunnel)
	return SDValue();

	// i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
	if (VT == MVT::i16) {
	Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
	DAG.getConstant(15, DL, Amt.getValueType()));
	unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
	return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
	}

	return Op;
	}

	static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	assert(VT.isVector() && "Custom lowering only for vector rotates!");

	SDLoc DL(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);
	unsigned Opcode = Op.getOpcode();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	int NumElts = VT.getVectorNumElements();
	bool IsROTL = Opcode == ISD::ROTL;

	// Check for constant splat rotation amount.
	APInt CstSplatValue;
	bool IsCstSplat = X86::isConstantSplat(Amt, CstSplatValue);

	// Check for splat rotate by zero.
	if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
	return R;

	// AVX512 implicitly uses modulo rotation amounts.
	if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
	// Attempt to rotate by immediate.
	if (IsCstSplat) {
	unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;
	uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
	return DAG.getNode(RotOpc, DL, VT, R,
	DAG.getTargetConstant(RotAmt, DL, MVT::i8));
	}

	// Else, fall-back on VPROLV/VPRORV.
	return Op;
	}

	// AVX512 VBMI2 vXi16 - lower to funnel shifts.
	if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
	unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
	return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
	}

	SDValue Z = DAG.getConstant(0, DL, VT);

	if (!IsROTL) {
	// If the ISD::ROTR amount is constant, we're always better converting to
	// ISD::ROTL.
	if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt}))
	return DAG.getNode(ISD::ROTL, DL, VT, R, NegAmt);

	// XOP targets always prefers ISD::ROTL.
	if (Subtarget.hasXOP())
	return DAG.getNode(ISD::ROTL, DL, VT, R,
	DAG.getNode(ISD::SUB, DL, VT, Z, Amt));
	}

	// Split 256-bit integers on XOP/pre-AVX2 targets.
	if (VT.is256BitVector() && (Subtarget.hasXOP() \|\| !Subtarget.hasAVX2()))
	return splitVectorIntBinary(Op, DAG);

	// XOP has 128-bit vector variable + immediate rotates.
	// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
	// XOP implicitly uses modulo rotation amounts.
	if (Subtarget.hasXOP()) {
	assert(IsROTL && "Only ROTL expected");
	assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");

	// Attempt to rotate by immediate.
	if (IsCstSplat) {
	uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
	return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
	DAG.getTargetConstant(RotAmt, DL, MVT::i8));
	}

	// Use general rotate by variable (per-element).
	return Op;
	}

	// Rotate by an uniform constant - expand back to shifts.
	if (IsCstSplat)
	return SDValue();

	// Split 512-bit integers on non 512-bit BWI targets.
	if (VT.is512BitVector() && !Subtarget.useBWIRegs())
	return splitVectorIntBinary(Op, DAG);

	assert(
	(VT == MVT::v4i32 \|\| VT == MVT::v8i16 \|\| VT == MVT::v16i8 \|\|
	((VT == MVT::v8i32 \|\| VT == MVT::v16i16 \|\| VT == MVT::v32i8) &&
	Subtarget.hasAVX2()) \|\|
	((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && Subtarget.useBWIRegs())) &&
	"Only vXi32/vXi16/vXi8 vector rotates supported");

	MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
	MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);

	SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
	SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);

	// Attempt to fold as unpack(x,x) << zext(splat(y)):
	// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
	// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
	if (EltSizeInBits == 8 \|\| EltSizeInBits == 16 \|\| EltSizeInBits == 32) {
	int BaseRotAmtIdx = -1;
	if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
	if (EltSizeInBits == 16 && Subtarget.hasSSE41()) {
	unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
	return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
	}
	unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
	SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
	SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
	Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, BaseRotAmt,
	BaseRotAmtIdx, Subtarget, DAG);
	Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt,
	BaseRotAmtIdx, Subtarget, DAG);
	return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
	}
	}

	// v16i8/v32i8/v64i8: Split rotation into rot4/rot2/rot1 stages and select by
	// the amount bit.
	// TODO: We're doing nothing here that we couldn't do for funnel shifts.
	if (EltSizeInBits == 8) {
	bool IsConstAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
	MVT WideVT =
	MVT::getVectorVT(Subtarget.hasBWI() ? MVT::i16 : MVT::i32, NumElts);
	unsigned ShiftOpc = IsROTL ? ISD::SHL : ISD::SRL;

	// Attempt to fold as:
	// rotl(x,y) -> (((aext(x) << bw) \| zext(x)) << (y & (bw-1))) >> bw.
	// rotr(x,y) -> (((aext(x) << bw) \| zext(x)) >> (y & (bw-1))).
	if (supportedVectorVarShift(WideVT, Subtarget, ShiftOpc) &&
	supportedVectorShiftWithImm(WideVT, Subtarget, ShiftOpc)) {
	// If we're rotating by constant, just use default promotion.
	if (IsConstAmt)
	return SDValue();
	// See if we can perform this by widening to vXi16 or vXi32.
	R = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, R);
	R = DAG.getNode(
	ISD::OR, DL, WideVT, R,
	getTargetVShiftByConstNode(X86ISD::VSHLI, DL, WideVT, R, 8, DAG));
	Amt = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, AmtMod);
	R = DAG.getNode(ShiftOpc, DL, WideVT, R, Amt);
	if (IsROTL)
	R = getTargetVShiftByConstNode(X86ISD::VSRLI, DL, WideVT, R, 8, DAG);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, R);
	}

	// Attempt to fold as unpack(x,x) << zext(y):
	// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
	// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
	if (IsConstAmt \|\| supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
	// See if we can perform this by unpacking to lo/hi vXi16.
	SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
	SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
	SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
	SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
	SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
	SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
	return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
	}
	assert((VT == MVT::v16i8 \|\| VT == MVT::v32i8) && "Unsupported vXi8 type");

	// We don't need ModuloAmt here as we just peek at individual bits.
	auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
	if (Subtarget.hasSSE41()) {
	// On SSE41 targets we can use PBLENDVB which selects bytes based just
	// on the sign bit.
	V0 = DAG.getBitcast(VT, V0);
	V1 = DAG.getBitcast(VT, V1);
	Sel = DAG.getBitcast(VT, Sel);
	return DAG.getBitcast(SelVT,
	DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1));
	}
	// On pre-SSE41 targets we test for the sign bit by comparing to
	// zero - a negative value will set all bits of the lanes to true
	// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
	SDValue Z = DAG.getConstant(0, DL, SelVT);
	SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel);
	return DAG.getSelect(DL, SelVT, C, V0, V1);
	};

	// ISD::ROTR is currently only profitable on AVX512 targets with VPTERNLOG.
	if (!IsROTL && !useVPTERNLOG(Subtarget, VT)) {
	Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt);
	IsROTL = true;
	}

	unsigned ShiftLHS = IsROTL ? ISD::SHL : ISD::SRL;
	unsigned ShiftRHS = IsROTL ? ISD::SRL : ISD::SHL;

	// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
	// We can safely do this using i16 shifts as we're only interested in
	// the 3 lower bits of each byte.
	Amt = DAG.getBitcast(ExtVT, Amt);
	Amt = DAG.getNode(ISD::SHL, DL, ExtVT, Amt, DAG.getConstant(5, DL, ExtVT));
	Amt = DAG.getBitcast(VT, Amt);

	// r = VSELECT(r, rot(r, 4), a);
	SDValue M;
	M = DAG.getNode(
	ISD::OR, DL, VT,
	DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(4, DL, VT)),
	DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(4, DL, VT)));
	R = SignBitSelect(VT, Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt);

	// r = VSELECT(r, rot(r, 2), a);
	M = DAG.getNode(
	ISD::OR, DL, VT,
	DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(2, DL, VT)),
	DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(6, DL, VT)));
	R = SignBitSelect(VT, Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt);

	// return VSELECT(r, rot(r, 1), a);
	M = DAG.getNode(
	ISD::OR, DL, VT,
	DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(1, DL, VT)),
	DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(7, DL, VT)));
	return SignBitSelect(VT, Amt, M, R);
	}

	bool IsSplatAmt = DAG.isSplatValue(Amt);
	bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
	bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
	supportedVectorVarShift(VT, Subtarget, ISD::SRL);

	// Fallback for splats + all supported variable shifts.
	// Fallback for non-constants AVX2 vXi16 as well.
	if (IsSplatAmt \|\| LegalVarShifts \|\| (Subtarget.hasAVX2() && !ConstantAmt)) {
	Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
	SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
	AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
	SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt);
	SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR);
	return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
	}

	// Everything below assumes ISD::ROTL.
	if (!IsROTL) {
	Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt);
	IsROTL = true;
	}

	// ISD::ROT* uses modulo rotate amounts.
	Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);

	assert(IsROTL && "Only ROTL supported");

	// As with shifts, attempt to convert the rotation amount to a multiplication
	// factor, fallback to general expansion.
	SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG);
	if (!Scale)
	return SDValue();

	// v8i16/v16i16: perform unsigned multiply hi/lo and OR the results.
	if (EltSizeInBits == 16) {
	SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale);
	SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale);
	return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
	}

	// v4i32: make use of the PMULUDQ instruction to multiply 2 lanes of v4i32
	// to v2i64 results at a time. The upper 32-bits contain the wrapped bits
	// that can then be OR'd with the lower 32-bits.
	assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected");
	static const int OddMask[] = {1, -1, 3, -1};
	SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask);
	SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask);

	SDValue Res02 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64,
	DAG.getBitcast(MVT::v2i64, R),
	DAG.getBitcast(MVT::v2i64, Scale));
	SDValue Res13 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64,
	DAG.getBitcast(MVT::v2i64, R13),
	DAG.getBitcast(MVT::v2i64, Scale13));
	Res02 = DAG.getBitcast(VT, Res02);
	Res13 = DAG.getBitcast(VT, Res13);

	return DAG.getNode(ISD::OR, DL, VT,
	DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}),
	DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7}));
	}

	/// Returns true if the operand type is exactly twice the native width, and
	/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
	/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
	/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
	bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
	unsigned OpWidth = MemType->getPrimitiveSizeInBits();

	if (OpWidth == 64)
	return Subtarget.canUseCMPXCHG8B() && !Subtarget.is64Bit();
	if (OpWidth == 128)
	return Subtarget.canUseCMPXCHG16B();

	return false;
	}

	TargetLoweringBase::AtomicExpansionKind
	X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	Type *MemType = SI->getValueOperand()->getType();

	bool NoImplicitFloatOps =
	SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
	if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
	!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
	(Subtarget.hasSSE1() \|\| Subtarget.hasX87()))
	return AtomicExpansionKind::None;

	return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand
	: AtomicExpansionKind::None;
	}

	// Note: this turns large loads into lock cmpxchg8b/16b.
	// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
	TargetLowering::AtomicExpansionKind
	X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	Type *MemType = LI->getType();

	// If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
	// can use movq to do the load. If we have X87 we can load into an 80-bit
	// X87 register and store it to a stack temporary.
	bool NoImplicitFloatOps =
	LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
	if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
	!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
	(Subtarget.hasSSE1() \|\| Subtarget.hasX87()))
	return AtomicExpansionKind::None;

	return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
	: AtomicExpansionKind::None;
	}

	enum BitTestKind : unsigned {
	UndefBit,
	ConstantBit,
	NotConstantBit,
	ShiftBit,
	NotShiftBit
	};

	static std::pair<Value , BitTestKind> FindSingleBitChange(Value V) {
	using namespace llvm::PatternMatch;
	BitTestKind BTK = UndefBit;
	auto *C = dyn_cast<ConstantInt>(V);
	if (C) {
	// Check if V is a power of 2 or NOT power of 2.
	if (isPowerOf2_64(C->getZExtValue()))
	BTK = ConstantBit;
	else if (isPowerOf2_64((~C->getValue()).getZExtValue()))
	BTK = NotConstantBit;
	return {V, BTK};
	}

	// Check if V is some power of 2 pattern known to be non-zero
	auto *I = dyn_cast<Instruction>(V);
	if (I) {
	bool Not = false;
	// Check if we have a NOT
	Value *PeekI;
	if (match(I, m_c_Xor(m_Value(PeekI), m_AllOnes())) \|\|
	match(I, m_Sub(m_AllOnes(), m_Value(PeekI)))) {
	Not = true;
	I = dyn_cast<Instruction>(PeekI);

	// If I is constant, it will fold and we can evaluate later. If its an
	// argument or something of that nature, we can't analyze.
	if (I == nullptr)
	return {nullptr, UndefBit};
	}
	// We can only use 1 << X without more sophisticated analysis. C << X where
	// C is a power of 2 but not 1 can result in zero which cannot be translated
	// to bittest. Likewise any C >> X (either arith or logical) can be zero.
	if (I->getOpcode() == Instruction::Shl) {
	// Todo(1): The cmpxchg case is pretty costly so matching `BLSI(X)`, `X &
	// -X` and some other provable power of 2 patterns that we can use CTZ on
	// may be profitable.
	// Todo(2): It may be possible in some cases to prove that Shl(C, X) is
	// non-zero even where C != 1. Likewise LShr(C, X) and AShr(C, X) may also
	// be provably a non-zero power of 2.
	// Todo(3): ROTL and ROTR patterns on a power of 2 C should also be
	// transformable to bittest.
	auto *ShiftVal = dyn_cast<ConstantInt>(I->getOperand(0));
	if (!ShiftVal)
	return {nullptr, UndefBit};
	if (ShiftVal->equalsInt(1))
	BTK = Not ? NotShiftBit : ShiftBit;

	if (BTK == UndefBit)
	return {nullptr, UndefBit};

	Value *BitV = I->getOperand(1);

	Value *AndOp;
	const APInt *AndC;
	if (match(BitV, m_c_And(m_Value(AndOp), m_APInt(AndC)))) {
	// Read past a shiftmask instruction to find count
	if (*AndC == (I->getType()->getPrimitiveSizeInBits() - 1))
	BitV = AndOp;
	}
	return {BitV, BTK};
	}
	}
	return {nullptr, UndefBit};
	}

	TargetLowering::AtomicExpansionKind
	X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
	// If the atomicrmw's result isn't actually used, we can just add a "lock"
	// prefix to a normal instruction for these operations.
	if (AI->use_empty())
	return AtomicExpansionKind::None;

	// If the atomicrmw's result is used by a single bit AND, we may use
	// bts/btr/btc instruction for these operations.
	// Note: InstCombinePass can cause a de-optimization here. It replaces the
	// SETCC(And(AtomicRMW(P, power_of_2), power_of_2)) with LShr and Xor
	// (depending on CC). This pattern can only use bts/btr/btc but we don't
	// detect it.
	Instruction *I = AI->user_back();
	auto BitChange = FindSingleBitChange(AI->getValOperand());
	if (BitChange.second == UndefBit \|\| !AI->hasOneUse() \|\|
	I->getOpcode() != Instruction::And \|\|
	AI->getType()->getPrimitiveSizeInBits() == 8 \|\|
	AI->getParent() != I->getParent())
	return AtomicExpansionKind::CmpXChg;

	unsigned OtherIdx = I->getOperand(0) == AI ? 1 : 0;

	// This is a redundant AND, it should get cleaned up elsewhere.
	if (AI == I->getOperand(OtherIdx))
	return AtomicExpansionKind::CmpXChg;

	// The following instruction must be a AND single bit.
	if (BitChange.second == ConstantBit \|\| BitChange.second == NotConstantBit) {
	auto *C1 = cast<ConstantInt>(AI->getValOperand());
	auto *C2 = dyn_cast<ConstantInt>(I->getOperand(OtherIdx));
	if (!C2 \|\| !isPowerOf2_64(C2->getZExtValue())) {
	return AtomicExpansionKind::CmpXChg;
	}
	if (AI->getOperation() == AtomicRMWInst::And) {
	return ~C1->getValue() == C2->getValue()
	? AtomicExpansionKind::BitTestIntrinsic
	: AtomicExpansionKind::CmpXChg;
	}
	return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic
	: AtomicExpansionKind::CmpXChg;
	}

	assert(BitChange.second == ShiftBit \|\| BitChange.second == NotShiftBit);

	auto BitTested = FindSingleBitChange(I->getOperand(OtherIdx));
	if (BitTested.second != ShiftBit && BitTested.second != NotShiftBit)
	return AtomicExpansionKind::CmpXChg;

	assert(BitChange.first != nullptr && BitTested.first != nullptr);

	// If shift amounts are not the same we can't use BitTestIntrinsic.
	if (BitChange.first != BitTested.first)
	return AtomicExpansionKind::CmpXChg;

	// If atomic AND need to be masking all be one bit and testing the one bit
	// unset in the mask.
	if (AI->getOperation() == AtomicRMWInst::And)
	return (BitChange.second == NotShiftBit && BitTested.second == ShiftBit)
	? AtomicExpansionKind::BitTestIntrinsic
	: AtomicExpansionKind::CmpXChg;

	// If atomic XOR/OR need to be setting and testing the same bit.
	return (BitChange.second == ShiftBit && BitTested.second == ShiftBit)
	? AtomicExpansionKind::BitTestIntrinsic
	: AtomicExpansionKind::CmpXChg;
	}

	void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
	IRBuilder<> Builder(AI);
	Intrinsic::ID IID_C = Intrinsic::not_intrinsic;
	Intrinsic::ID IID_I = Intrinsic::not_intrinsic;
	switch (AI->getOperation()) {
	default:
	llvm_unreachable("Unknown atomic operation");
	case AtomicRMWInst::Or:
	IID_C = Intrinsic::x86_atomic_bts;
	IID_I = Intrinsic::x86_atomic_bts_rm;
	break;
	case AtomicRMWInst::Xor:
	IID_C = Intrinsic::x86_atomic_btc;
	IID_I = Intrinsic::x86_atomic_btc_rm;
	break;
	case AtomicRMWInst::And:
	IID_C = Intrinsic::x86_atomic_btr;
	IID_I = Intrinsic::x86_atomic_btr_rm;
	break;
	}
	Instruction *I = AI->user_back();
	LLVMContext &Ctx = AI->getContext();
	Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
	Type::getInt8PtrTy(Ctx));
	Function *BitTest = nullptr;
	Value *Result = nullptr;
	auto BitTested = FindSingleBitChange(AI->getValOperand());
	assert(BitTested.first != nullptr);

	if (BitTested.second == ConstantBit \|\| BitTested.second == NotConstantBit) {
	auto *C = cast<ConstantInt>(I->getOperand(I->getOperand(0) == AI ? 1 : 0));

	BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType());

	unsigned Imm = countTrailingZeros(C->getZExtValue());
	Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
	} else {
	BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType());

	assert(BitTested.second == ShiftBit \|\| BitTested.second == NotShiftBit);

	Value *SI = BitTested.first;
	assert(SI != nullptr);

	// BT{S\|R\|C} on memory operand don't modulo bit position so we need to
	// mask it.
	unsigned ShiftBits = SI->getType()->getPrimitiveSizeInBits();
	Value *BitPos =
	Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1));
	// Todo(1): In many cases it may be provable that SI is less than
	// ShiftBits in which case this mask is unnecessary
	// Todo(2): In the fairly idiomatic case of P[X / sizeof_bits(X)] OP 1
	// << (X % sizeof_bits(X)) we can drop the shift mask and AGEN in
	// favor of just a raw BT{S\|R\|C}.

	Result = Builder.CreateCall(BitTest, {Addr, BitPos});
	Result = Builder.CreateZExtOrTrunc(Result, AI->getType());

	// If the result is only used for zero/non-zero status then we don't need to
	// shift value back. Otherwise do so.
	for (auto It = I->user_begin(); It != I->user_end(); ++It) {
	if (auto ICmp = dyn_cast<ICmpInst>(It)) {
	if (ICmp->isEquality()) {
	auto *C0 = dyn_cast<ConstantInt>(ICmp->getOperand(0));
	auto *C1 = dyn_cast<ConstantInt>(ICmp->getOperand(1));
	if (C0 \|\| C1) {
	assert(C0 == nullptr \|\| C1 == nullptr);
	if ((C0 ? C0 : C1)->isZero())
	continue;
	}
	}
	}
	Result = Builder.CreateShl(Result, BitPos);
	break;
	}
	}

	I->replaceAllUsesWith(Result);
	I->eraseFromParent();
	AI->eraseFromParent();
	}

	static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
	using namespace llvm::PatternMatch;
	if (!AI->hasOneUse())
	return false;

	Value *Op = AI->getOperand(1);
	ICmpInst::Predicate Pred;
	Instruction *I = AI->user_back();
	AtomicRMWInst::BinOp Opc = AI->getOperation();
	if (Opc == AtomicRMWInst::Add) {
	if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value())))
	return Pred == CmpInst::ICMP_EQ \|\| Pred == CmpInst::ICMP_NE;
	if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) {
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
	return Pred == CmpInst::ICMP_SLT;
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
	return Pred == CmpInst::ICMP_SGT;
	}
	return false;
	}
	if (Opc == AtomicRMWInst::Sub) {
	if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
	return Pred == CmpInst::ICMP_EQ \|\| Pred == CmpInst::ICMP_NE;
	if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) {
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
	return Pred == CmpInst::ICMP_SLT;
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
	return Pred == CmpInst::ICMP_SGT;
	}
	return false;
	}
	if ((Opc == AtomicRMWInst::Or &&
	match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) \|\|
	(Opc == AtomicRMWInst::And &&
	match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))))) {
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
	return Pred == CmpInst::ICMP_EQ \|\| Pred == CmpInst::ICMP_NE \|\|
	Pred == CmpInst::ICMP_SLT;
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
	return Pred == CmpInst::ICMP_SGT;
	return false;
	}
	if (Opc == AtomicRMWInst::Xor) {
	if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
	return Pred == CmpInst::ICMP_EQ \|\| Pred == CmpInst::ICMP_NE;
	if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) {
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
	return Pred == CmpInst::ICMP_SLT;
	if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
	return Pred == CmpInst::ICMP_SGT;
	}
	return false;
	}

	return false;
	}

	void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
	AtomicRMWInst *AI) const {
	IRBuilder<> Builder(AI);
	Instruction *TempI = nullptr;
	LLVMContext &Ctx = AI->getContext();
	ICmpInst *ICI = dyn_cast<ICmpInst>(AI->user_back());
	if (!ICI) {
	TempI = AI->user_back();
	assert(TempI->hasOneUse() && "Must have one use");
	ICI = cast<ICmpInst>(TempI->user_back());
	}
	X86::CondCode CC = X86::COND_INVALID;
	ICmpInst::Predicate Pred = ICI->getPredicate();
	switch (Pred) {
	default:
	llvm_unreachable("Not supported Pred");
	case CmpInst::ICMP_EQ:
	CC = X86::COND_E;
	break;
	case CmpInst::ICMP_NE:
	CC = X86::COND_NE;
	break;
	case CmpInst::ICMP_SLT:
	CC = X86::COND_S;
	break;
	case CmpInst::ICMP_SGT:
	CC = X86::COND_NS;
	break;
	}
	Intrinsic::ID IID = Intrinsic::not_intrinsic;
	switch (AI->getOperation()) {
	default:
	llvm_unreachable("Unknown atomic operation");
	case AtomicRMWInst::Add:
	IID = Intrinsic::x86_atomic_add_cc;
	break;
	case AtomicRMWInst::Sub:
	IID = Intrinsic::x86_atomic_sub_cc;
	break;
	case AtomicRMWInst::Or:
	IID = Intrinsic::x86_atomic_or_cc;
	break;
	case AtomicRMWInst::And:
	IID = Intrinsic::x86_atomic_and_cc;
	break;
	case AtomicRMWInst::Xor:
	IID = Intrinsic::x86_atomic_xor_cc;
	break;
	}
	Function *CmpArith =
	Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
	Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
	Type::getInt8PtrTy(Ctx));
	Value *Call = Builder.CreateCall(
	CmpArith, {Addr, AI->getValOperand(), Builder.getInt32((unsigned)CC)});
	Value *Result = Builder.CreateTrunc(Call, Type::getInt1Ty(Ctx));
	ICI->replaceAllUsesWith(Result);
	ICI->eraseFromParent();
	if (TempI)
	TempI->eraseFromParent();
	AI->eraseFromParent();
	}

	TargetLowering::AtomicExpansionKind
	X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
	Type *MemType = AI->getType();

	// If the operand is too big, we must see if cmpxchg8/16b is available
	// and default to library calls otherwise.
	if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
	return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
	: AtomicExpansionKind::None;
	}

	AtomicRMWInst::BinOp Op = AI->getOperation();
	switch (Op) {
	case AtomicRMWInst::Xchg:
	return AtomicExpansionKind::None;
	case AtomicRMWInst::Add:
	case AtomicRMWInst::Sub:
	if (shouldExpandCmpArithRMWInIR(AI))
	return AtomicExpansionKind::CmpArithIntrinsic;
	// It's better to use xadd, xsub or xchg for these in other cases.
	return AtomicExpansionKind::None;
	case AtomicRMWInst::Or:
	case AtomicRMWInst::And:
	case AtomicRMWInst::Xor:
	if (shouldExpandCmpArithRMWInIR(AI))
	return AtomicExpansionKind::CmpArithIntrinsic;
	return shouldExpandLogicAtomicRMWInIR(AI);
	case AtomicRMWInst::Nand:
	case AtomicRMWInst::Max:
	case AtomicRMWInst::Min:
	case AtomicRMWInst::UMax:
	case AtomicRMWInst::UMin:
	case AtomicRMWInst::FAdd:
	case AtomicRMWInst::FSub:
	case AtomicRMWInst::FMax:
	case AtomicRMWInst::FMin:
	case AtomicRMWInst::UIncWrap:
	case AtomicRMWInst::UDecWrap:
	default:
	// These always require a non-trivial set of data operations on x86. We must
	// use a cmpxchg loop.
	return AtomicExpansionKind::CmpXChg;
	}
	}

	LoadInst *
	X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
	unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
	Type *MemType = AI->getType();
	// Accesses larger than the native width are turned into cmpxchg/libcalls, so
	// there is no benefit in turning such RMWs into loads, and it is actually
	// harmful as it introduces a mfence.
	if (MemType->getPrimitiveSizeInBits() > NativeWidth)
	return nullptr;

	// If this is a canonical idempotent atomicrmw w/no uses, we have a better
	// lowering available in lowerAtomicArith.
	// TODO: push more cases through this path.
	if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
	if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
	AI->use_empty())
	return nullptr;

	IRBuilder<> Builder(AI);
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	auto SSID = AI->getSyncScopeID();
	// We must restrict the ordering to avoid generating loads with Release or
	// ReleaseAcquire orderings.
	auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());

	// Before the load we need a fence. Here is an example lifted from
	// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
	// is required:
	// Thread 0:
	// x.store(1, relaxed);
	// r1 = y.fetch_add(0, release);
	// Thread 1:
	// y.fetch_add(42, acquire);
	// r2 = x.load(relaxed);
	// r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is
	// lowered to just a load without a fence. A mfence flushes the store buffer,
	// making the optimization clearly correct.
	// FIXME: it is required if isReleaseOrStronger(Order) but it is not clear
	// otherwise, we might be able to be more aggressive on relaxed idempotent
	// rmw. In practice, they do not look useful, so we don't try to be
	// especially clever.
	if (SSID == SyncScope::SingleThread)
	// FIXME: we could just insert an ISD::MEMBARRIER here, except we are at
	// the IR level, so we must wrap it in an intrinsic.
	return nullptr;

	if (!Subtarget.hasMFence())
	// FIXME: it might make sense to use a locked operation here but on a
	// different cache-line to prevent cache-line bouncing. In practice it
	// is probably a small win, and x86 processors without mfence are rare
	// enough that we do not bother.
	return nullptr;

	Function *MFence =
	llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence);
	Builder.CreateCall(MFence, {});

	// Finally we can emit the atomic load.
	LoadInst *Loaded = Builder.CreateAlignedLoad(
	AI->getType(), AI->getPointerOperand(), AI->getAlign());
	Loaded->setAtomic(Order, SSID);
	AI->replaceAllUsesWith(Loaded);
	AI->eraseFromParent();
	return Loaded;
	}

	bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
	if (!SI.isUnordered())
	return false;
	return ExperimentalUnorderedISEL;
	}
	bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
	if (!LI.isUnordered())
	return false;
	return ExperimentalUnorderedISEL;
	}


	/// Emit a locked operation on a stack location which does not change any
	/// memory location, but does involve a lock prefix. Location is chosen to be
	/// a) very likely accessed only by a single thread to minimize cache traffic,
	/// and b) definitely dereferenceable. Returns the new Chain result.
	static SDValue emitLockedStackOp(SelectionDAG &DAG,
	const X86Subtarget &Subtarget, SDValue Chain,
	const SDLoc &DL) {
	// Implementation notes:
	// 1) LOCK prefix creates a full read/write reordering barrier for memory
	// operations issued by the current processor. As such, the location
	// referenced is not relevant for the ordering properties of the instruction.
	// See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
	// 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
	// 2) Using an immediate operand appears to be the best encoding choice
	// here since it doesn't require an extra register.
	// 3) OR appears to be very slightly faster than ADD. (Though, the difference
	// is small enough it might just be measurement noise.)
	// 4) When choosing offsets, there are several contributing factors:
	// a) If there's no redzone, we default to TOS. (We could allocate a cache
	// line aligned stack object to improve this case.)
	// b) To minimize our chances of introducing a false dependence, we prefer
	// to offset the stack usage from TOS slightly.
	// c) To minimize concerns about cross thread stack usage - in particular,
	// the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
	// captures state in the TOS frame and accesses it from many threads -
	// we want to use an offset such that the offset is in a distinct cache
	// line from the TOS frame.
	//
	// For a general discussion of the tradeoffs and benchmark results, see:
	// https://shipilev.net/blog/2014/on-the-fence-with-dependencies/

	auto &MF = DAG.getMachineFunction();
	auto &TFL = *Subtarget.getFrameLowering();
	const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0;

	if (Subtarget.is64Bit()) {
	SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
	SDValue Ops[] = {
	DAG.getRegister(X86::RSP, MVT::i64), // Base
	DAG.getTargetConstant(1, DL, MVT::i8), // Scale
	DAG.getRegister(0, MVT::i64), // Index
	DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
	DAG.getRegister(0, MVT::i16), // Segment.
	Zero,
	Chain};
	SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
	MVT::Other, Ops);
	return SDValue(Res, 1);
	}

	SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
	SDValue Ops[] = {
	DAG.getRegister(X86::ESP, MVT::i32), // Base
	DAG.getTargetConstant(1, DL, MVT::i8), // Scale
	DAG.getRegister(0, MVT::i32), // Index
	DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
	DAG.getRegister(0, MVT::i16), // Segment.
	Zero,
	Chain
	};
	SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
	MVT::Other, Ops);
	return SDValue(Res, 1);
	}

	static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	AtomicOrdering FenceOrdering =
	static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
	SyncScope::ID FenceSSID =
	static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));

	// The only fence that needs an instruction is a sequentially-consistent
	// cross-thread fence.
	if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
	FenceSSID == SyncScope::System) {
	if (Subtarget.hasMFence())
	return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));

	SDValue Chain = Op.getOperand(0);
	return emitLockedStackOp(DAG, Subtarget, Chain, dl);
	}

	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
	return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
	}

	static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT T = Op.getSimpleValueType();
	SDLoc DL(Op);
	unsigned Reg = 0;
	unsigned size = 0;
	switch(T.SimpleTy) {
	default: llvm_unreachable("Invalid value type!");
	case MVT::i8: Reg = X86::AL; size = 1; break;
	case MVT::i16: Reg = X86::AX; size = 2; break;
	case MVT::i32: Reg = X86::EAX; size = 4; break;
	case MVT::i64:
	assert(Subtarget.is64Bit() && "Node not type legal!");
	Reg = X86::RAX; size = 8;
	break;
	}
	SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
	Op.getOperand(2), SDValue());
	SDValue Ops[] = { cpIn.getValue(0),
	Op.getOperand(1),
	Op.getOperand(3),
	DAG.getTargetConstant(size, DL, MVT::i8),
	cpIn.getValue(1) };
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
	SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
	Ops, T, MMO);

	SDValue cpOut =
	DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
	SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
	MVT::i32, cpOut.getValue(2));
	SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);

	return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
	cpOut, Success, EFLAGS.getValue(1));
	}

	// Create MOVMSKB, taking into account whether we need to split for AVX1.
	static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT InVT = V.getSimpleValueType();

	if (InVT == MVT::v64i8) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
	Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget);
	Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget);
	Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
	Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
	Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
	DAG.getConstant(32, DL, MVT::i8));
	return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
	}
	if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
	Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
	Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
	Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi,
	DAG.getConstant(16, DL, MVT::i8));
	return DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi);
	}

	return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
	}

	static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	MVT DstVT = Op.getSimpleValueType();

	// Legalize (v64i1 (bitcast i64 (X))) by splitting the i64, bitcasting each
	// half to v32i1 and concatenating the result.
	if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) {
	assert(!Subtarget.is64Bit() && "Expected 32-bit mode");
	assert(Subtarget.hasBWI() && "Expected BWI target");
	SDLoc dl(Op);
	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
	DAG.getIntPtrConstant(0, dl));
	Lo = DAG.getBitcast(MVT::v32i1, Lo);
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
	DAG.getIntPtrConstant(1, dl));
	Hi = DAG.getBitcast(MVT::v32i1, Hi);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
	}

	// Use MOVMSK for vector to scalar conversion to prevent scalarization.
	if ((SrcVT == MVT::v16i1 \|\| SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) {
	assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512");
	MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8;
	SDLoc DL(Op);
	SDValue V = DAG.getSExtOrTrunc(Src, DL, SExtVT);
	V = getPMOVMSKB(DL, V, DAG, Subtarget);
	return DAG.getZExtOrTrunc(V, DL, DstVT);
	}

	assert((SrcVT == MVT::v2i32 \|\| SrcVT == MVT::v4i16 \|\| SrcVT == MVT::v8i8 \|\|
	SrcVT == MVT::i64) && "Unexpected VT!");

	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) &&
	!(DstVT == MVT::x86mmx && SrcVT.isVector()))
	// This conversion needs to be expanded.
	return SDValue();

	SDLoc dl(Op);
	if (SrcVT.isVector()) {
	// Widen the vector in input in the case of MVT::v2i32.
	// Example: from MVT::v2i32 to MVT::v4i32.
	MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
	SrcVT.getVectorNumElements() * 2);
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
	DAG.getUNDEF(SrcVT));
	} else {
	assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
	"Unexpected source type in LowerBITCAST");
	Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
	}

	MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
	Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src);

	if (DstVT == MVT::x86mmx)
	return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src,
	DAG.getIntPtrConstant(0, dl));
	}

	/// Compute the horizontal sum of bytes in V for the elements of VT.
	///
	/// Requires V to be a byte vector and VT to be an integer vector type with
	/// wider elements than V's type. The width of the elements of VT determines
	/// how many bytes of V are summed horizontally to produce each element of the
	/// result.
	static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc DL(V);
	MVT ByteVecVT = V.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	assert(ByteVecVT.getVectorElementType() == MVT::i8 &&
	"Expected value to have byte element type.");
	assert(EltVT != MVT::i8 &&
	"Horizontal byte sum only makes sense for wider elements!");
	unsigned VecSize = VT.getSizeInBits();
	assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");

	// PSADBW instruction horizontally add all bytes and leave the result in i64
	// chunks, thus directly computes the pop count for v2i64 and v4i64.
	if (EltVT == MVT::i64) {
	SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);
	MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
	V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
	return DAG.getBitcast(VT, V);
	}

	if (EltVT == MVT::i32) {
	// We unpack the low half and high half into i32s interleaved with zeros so
	// that we can use PSADBW to horizontally sum them. The most useful part of
	// this is that it lines up the results of two PSADBW instructions to be
	// two v2i64 vectors which concatenated are the 4 population counts. We can
	// then use PACKUSWB to shrink and concatenate them into a v4i32 again.
	SDValue Zeros = DAG.getConstant(0, DL, VT);
	SDValue V32 = DAG.getBitcast(VT, V);
	SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);
	SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);

	// Do the horizontal sums into two v2i64s.
	Zeros = DAG.getConstant(0, DL, ByteVecVT);
	MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
	Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
	DAG.getBitcast(ByteVecVT, Low), Zeros);
	High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
	DAG.getBitcast(ByteVecVT, High), Zeros);

	// Merge them together.
	MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16);
	V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT,
	DAG.getBitcast(ShortVecVT, Low),
	DAG.getBitcast(ShortVecVT, High));

	return DAG.getBitcast(VT, V);
	}

	// The only element type left is i16.
	assert(EltVT == MVT::i16 && "Unknown how to handle type");

	// To obtain pop count for each i16 element starting from the pop count for
	// i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s
	// right by 8. It is important to shift as i16s as i8 vector shift isn't
	// directly supported.
	SDValue ShifterV = DAG.getConstant(8, DL, VT);
	SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
	V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl),
	DAG.getBitcast(ByteVecVT, V));
	return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
	}

	static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	int NumElts = VT.getVectorNumElements();
	(void)EltVT;
	assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported.");

	// Implement a lookup table in register by using an algorithm based on:
	// http://wm.ite.pl/articles/sse-popcount.html
	//
	// The general idea is that every lower byte nibble in the input vector is an
	// index into a in-register pre-computed pop count table. We then split up the
	// input vector in two new ones: (1) a vector with only the shifted-right
	// higher nibbles for each byte and (2) a vector with the lower nibbles (and
	// masked out higher ones) for each byte. PSHUFB is used separately with both
	// to index the in-register table. Next, both are added and the result is a
	// i8 vector where each element contains the pop count for input byte.
	const int LUT[16] = {/* 0 / 0, / 1 / 1, / 2 / 1, / 3 */ 2,
	/* 4 / 1, / 5 / 2, / 6 / 2, / 7 */ 3,
	/* 8 / 1, / 9 / 2, / a / 2, / b */ 3,
	/* c / 2, / d / 3, / e / 3, / f */ 4};

	SmallVector<SDValue, 64> LUTVec;
	for (int i = 0; i < NumElts; ++i)
	LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
	SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec);
	SDValue M0F = DAG.getConstant(0x0F, DL, VT);

	// High nibbles
	SDValue FourV = DAG.getConstant(4, DL, VT);
	SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV);

	// Low nibbles
	SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F);

	// The input vector is used as the shuffle mask that index elements into the
	// LUT. After counting low and high nibbles, add the vector to obtain the
	// final pop count per i8 element.
	SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles);
	SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles);
	return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt);
	}

	// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
	// updated cost models in X86TTIImpl::getIntrinsicInstrCost.
	static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	assert((VT.is512BitVector() \|\| VT.is256BitVector() \|\| VT.is128BitVector()) &&
	"Unknown CTPOP type to handle");
	SDLoc DL(Op.getNode());
	SDValue Op0 = Op.getOperand(0);

	// TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions.
	if (Subtarget.hasVPOPCNTDQ()) {
	unsigned NumElems = VT.getVectorNumElements();
	assert((VT.getVectorElementType() == MVT::i8 \|\|
	VT.getVectorElementType() == MVT::i16) && "Unexpected type");
	if (NumElems < 16 \|\| (NumElems == 16 && Subtarget.canExtendTo512DQ())) {
	MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
	Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0);
	Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
	}
	}

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return splitVectorIntUnary(Op, DAG);

	// Decompose 512-bit ops into smaller 256-bit ops.
	if (VT.is512BitVector() && !Subtarget.hasBWI())
	return splitVectorIntUnary(Op, DAG);

	// For element types greater than i8, do vXi8 pop counts and a bytesum.
	if (VT.getScalarType() != MVT::i8) {
	MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
	SDValue ByteOp = DAG.getBitcast(ByteVT, Op0);
	SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp);
	return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG);
	}

	// We can't use the fast LUT approach, so fall back on LegalizeDAG.
	if (!Subtarget.hasSSSE3())
	return SDValue();

	return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
	}

	static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().isVector() &&
	"We only do custom lowering for vector population count.");
	return LowerVectorCTPOP(Op, Subtarget, DAG);
	}

	static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	SDLoc DL(Op);

	// For scalars, its still beneficial to transfer to/from the SIMD unit to
	// perform the BITREVERSE.
	if (!VT.isVector()) {
	MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
	Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	}

	int NumElts = VT.getVectorNumElements();
	int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector())
	return splitVectorIntUnary(Op, DAG);

	assert(VT.is128BitVector() &&
	"Only 128-bit vector bitreverse lowering supported.");

	// VPPERM reverses the bits of a byte with the permute Op (2 << 5), and we
	// perform the BSWAP in the shuffle.
	// Its best to shuffle using the second operand as this will implicitly allow
	// memory folding for multiple vectors.
	SmallVector<SDValue, 16> MaskElts;
	for (int i = 0; i != NumElts; ++i) {
	for (int j = ScalarSizeInBytes - 1; j >= 0; --j) {
	int SourceByte = 16 + (i * ScalarSizeInBytes) + j;
	int PermuteByte = SourceByte \| (2 << 5);
	MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8));
	}
	}

	SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts);
	SDValue Res = DAG.getBitcast(MVT::v16i8, In);
	Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8),
	Res, Mask);
	return DAG.getBitcast(VT, Res);
	}

	static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	if (Subtarget.hasXOP() && !VT.is512BitVector())
	return LowerBITREVERSE_XOP(Op, DAG);

	assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");

	SDValue In = Op.getOperand(0);
	SDLoc DL(Op);

	assert(VT.getScalarType() == MVT::i8 &&
	"Only byte vector BITREVERSE supported");

	// Split v64i8 without BWI so that we can still use the PSHUFB lowering.
	if (VT == MVT::v64i8 && !Subtarget.hasBWI())
	return splitVectorIntUnary(Op, DAG);

	// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
	if (VT == MVT::v32i8 && !Subtarget.hasInt256())
	return splitVectorIntUnary(Op, DAG);

	unsigned NumElts = VT.getVectorNumElements();

	// If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
	if (Subtarget.hasGFNI()) {
	MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
	SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT);
	Matrix = DAG.getBitcast(VT, Matrix);
	return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix,
	DAG.getTargetConstant(0, DL, MVT::i8));
	}

	// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
	// two nibbles and a PSHUFB lookup to find the bitreverse of each
	// 0-15 value (moved to the other nibble).
	SDValue NibbleMask = DAG.getConstant(0xF, DL, VT);
	SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask);
	SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT));

	const int LoLUT[16] = {
	/* 0 / 0x00, / 1 / 0x80, / 2 / 0x40, / 3 */ 0xC0,
	/* 4 / 0x20, / 5 / 0xA0, / 6 / 0x60, / 7 */ 0xE0,
	/* 8 / 0x10, / 9 / 0x90, / a / 0x50, / b */ 0xD0,
	/* c / 0x30, / d / 0xB0, / e / 0x70, / f */ 0xF0};
	const int HiLUT[16] = {
	/* 0 / 0x00, / 1 / 0x08, / 2 / 0x04, / 3 */ 0x0C,
	/* 4 / 0x02, / 5 / 0x0A, / 6 / 0x06, / 7 */ 0x0E,
	/* 8 / 0x01, / 9 / 0x09, / a / 0x05, / b */ 0x0D,
	/* c / 0x03, / d / 0x0B, / e / 0x07, / f */ 0x0F};

	SmallVector<SDValue, 16> LoMaskElts, HiMaskElts;
	for (unsigned i = 0; i < NumElts; ++i) {
	LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8));
	HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8));
	}

	SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts);
	SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts);
	Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo);
	Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi);
	return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
	}

	static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc DL(Op);
	SDValue X = Op.getOperand(0);
	MVT VT = Op.getSimpleValueType();

	// Special case. If the input fits in 8-bits we can use a single 8-bit TEST.
	if (VT == MVT::i8 \|\|
	DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) {
	X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
	SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X,
	DAG.getConstant(0, DL, MVT::i8));
	// Copy the inverse of the parity flag into a register with setcc.
	SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
	// Extend to the original type.
	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
	}

	// If we have POPCNT, use the default expansion.
	if (Subtarget.hasPOPCNT())
	return SDValue();

	if (VT == MVT::i64) {
	// Xor the high and low 16-bits together using a 32-bit operation.
	SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
	DAG.getNode(ISD::SRL, DL, MVT::i64, X,
	DAG.getConstant(32, DL, MVT::i8)));
	SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
	X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
	}

	if (VT != MVT::i16) {
	// Xor the high and low 16-bits together using a 32-bit operation.
	SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X,
	DAG.getConstant(16, DL, MVT::i8));
	X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16);
	} else {
	// If the input is 16-bits, we need to extend to use an i32 shift below.
	X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X);
	}

	// Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
	// This should allow an h-reg to be used to save a shift.
	SDValue Hi = DAG.getNode(
	ISD::TRUNCATE, DL, MVT::i8,
	DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8)));
	SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
	SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
	SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);

	// Copy the inverse of the parity flag into a register with setcc.
	SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
	// Extend to the original type.
	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
	}

	static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned NewOpc = 0;
	switch (N->getOpcode()) {
	case ISD::ATOMIC_LOAD_ADD:
	NewOpc = X86ISD::LADD;
	break;
	case ISD::ATOMIC_LOAD_SUB:
	NewOpc = X86ISD::LSUB;
	break;
	case ISD::ATOMIC_LOAD_OR:
	NewOpc = X86ISD::LOR;
	break;
	case ISD::ATOMIC_LOAD_XOR:
	NewOpc = X86ISD::LXOR;
	break;
	case ISD::ATOMIC_LOAD_AND:
	NewOpc = X86ISD::LAND;
	break;
	default:
	llvm_unreachable("Unknown ATOMIC_LOAD_ opcode");
	}

	MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();

	return DAG.getMemIntrinsicNode(
	NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
	{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
	/MemVT=/N->getSimpleValueType(0), MMO);
	}

	/// Lower atomic_load_ops into LOCK-prefixed operations.
	static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
	SDValue Chain = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	unsigned Opc = N->getOpcode();
	MVT VT = N->getSimpleValueType(0);
	SDLoc DL(N);

	// We can lower atomic_load_add into LXADD. However, any other atomicrmw op
	// can only be lowered when the result is unused. They should have already
	// been transformed into a cmpxchg loop in AtomicExpand.
	if (N->hasAnyUseOfValue(0)) {
	// Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
	// select LXADD if LOCK_SUB can't be selected.
	if (Opc == ISD::ATOMIC_LOAD_SUB) {
	RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
	return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
	RHS, AN->getMemOperand());
	}
	assert(Opc == ISD::ATOMIC_LOAD_ADD &&
	"Used AtomicRMW ops other than Add should have been expanded!");
	return N;
	}

	// Specialized lowering for the canonical form of an idemptotent atomicrmw.
	// The core idea here is that since the memory location isn't actually
	// changing, all we need is a lowering for the ordering impacts of the
	// atomicrmw. As such, we can chose a different operation and memory
	// location to minimize impact on other code.
	if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) {
	// On X86, the only ordering which actually requires an instruction is
	// seq_cst which isn't SingleThread, everything just needs to be preserved
	// during codegen and then dropped. Note that we expect (but don't assume),
	// that orderings other than seq_cst and acq_rel have been canonicalized to
	// a store or load.
	if (AN->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent &&
	AN->getSyncScopeID() == SyncScope::System) {
	// Prefer a locked operation against a stack location to minimize cache
	// traffic. This assumes that stack locations are very likely to be
	// accessed only by the owning thread.
	SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
	assert(!N->hasAnyUseOfValue(0));
	// NOTE: The getUNDEF is needed to give something for the unused result 0.
	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
	DAG.getUNDEF(VT), NewChain);
	}
	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
	SDValue NewChain = DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Chain);
	assert(!N->hasAnyUseOfValue(0));
	// NOTE: The getUNDEF is needed to give something for the unused result 0.
	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
	DAG.getUNDEF(VT), NewChain);
	}

	SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
	// RAUW the chain, but don't worry about the result, as it's unused.
	assert(!N->hasAnyUseOfValue(0));
	// NOTE: The getUNDEF is needed to give something for the unused result 0.
	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
	DAG.getUNDEF(VT), LockOp.getValue(1));
	}

	static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	auto *Node = cast<AtomicSDNode>(Op.getNode());
	SDLoc dl(Node);
	EVT VT = Node->getMemoryVT();

	bool IsSeqCst =
	Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent;
	bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT);

	// If this store is not sequentially consistent and the type is legal
	// we can just keep it.
	if (!IsSeqCst && IsTypeLegal)
	return Op;

	if (VT == MVT::i64 && !IsTypeLegal) {
	// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
	// is enabled.
	bool NoImplicitFloatOps =
	DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat);
	if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
	SDValue Chain;
	if (Subtarget.hasSSE1()) {
	SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
	Node->getOperand(2));
	MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
	SclToVec = DAG.getBitcast(StVT, SclToVec);
	SDVTList Tys = DAG.getVTList(MVT::Other);
	SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};
	Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
	MVT::i64, Node->getMemOperand());
	} else if (Subtarget.hasX87()) {
	// First load this into an 80-bit X87 register using a stack temporary.
	// This will put the whole integer into the significand.
	SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
	int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
	Chain =
	DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr,
	MPI, MaybeAlign(), MachineMemOperand::MOStore);
	SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
	SDValue LdOps[] = {Chain, StackPtr};
	SDValue Value = DAG.getMemIntrinsicNode(
	X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI,
	/Align/ std::nullopt, MachineMemOperand::MOLoad);
	Chain = Value.getValue(1);

	// Now use an FIST to do the atomic store.
	SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()};
	Chain =
	DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other),
	StoreOps, MVT::i64, Node->getMemOperand());
	}

	if (Chain) {
	// If this is a sequentially consistent store, also emit an appropriate
	// barrier.
	if (IsSeqCst)
	Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);

	return Chain;
	}
	}
	}

	// Convert seq_cst store -> xchg
	// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
	// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
	SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
	Node->getMemoryVT(),
	Node->getOperand(0),
	Node->getOperand(1), Node->getOperand(2),
	Node->getMemOperand());
	return Swap.getValue(1);
	}

	static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
	SDNode *N = Op.getNode();
	MVT VT = N->getSimpleValueType(0);
	unsigned Opc = Op.getOpcode();

	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	SDLoc DL(N);

	// Set the carry flag.
	SDValue Carry = Op.getOperand(2);
	EVT CarryVT = Carry.getValueType();
	Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
	Carry, DAG.getAllOnesConstant(DL, CarryVT));

	bool IsAdd = Opc == ISD::ADDCARRY \|\| Opc == ISD::SADDO_CARRY;
	SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
	Op.getOperand(0), Op.getOperand(1),
	Carry.getValue(1));

	bool IsSigned = Opc == ISD::SADDO_CARRY \|\| Opc == ISD::SSUBO_CARRY;
	SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
	Sum.getValue(1), DL, DAG);
	if (N->getValueType(1) == MVT::i1)
	SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);

	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
	}

	static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());

	// For MacOSX, we want to call an alternative entry point: __sincos_stret,
	// which returns the values as { float, float } (in XMM0) or
	// { double, double } (which is returned in XMM0, XMM1).
	SDLoc dl(Op);
	SDValue Arg = Op.getOperand(0);
	EVT ArgVT = Arg.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;

	Entry.Node = Arg;
	Entry.Ty = ArgTy;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);

	bool isF64 = ArgVT == MVT::f64;
	// Only optimize x86_64 for now. i386 is a bit messy. For f32,
	// the small struct {f32, f32} is returned in (eax, edx). For f64,
	// the results are returned via SRet in memory.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
	const char *LibcallName = TLI.getLibcallName(LC);
	SDValue Callee =
	DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));

	Type RetTy = isF64 ? (Type )StructType::get(ArgTy, ArgTy)
	: (Type *)FixedVectorType::get(ArgTy, 4);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(DAG.getEntryNode())
	.setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));

	std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);

	if (isF64)
	// Returned in xmm0 and xmm1.
	return CallResult.first;

	// Returned in bits 0:31 and 32:64 xmm0.
	SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
	CallResult.first, DAG.getIntPtrConstant(0, dl));
	SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
	CallResult.first, DAG.getIntPtrConstant(1, dl));
	SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
	}

	/// Widen a vector input to a vector of NVT. The
	/// input vector must have the same element type as NVT.
	static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
	bool FillWithZeroes = false) {
	// Check if InOp already has the right width.
	MVT InVT = InOp.getSimpleValueType();
	if (InVT == NVT)
	return InOp;

	if (InOp.isUndef())
	return DAG.getUNDEF(NVT);

	assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
	"input and widen element type must match");

	unsigned InNumElts = InVT.getVectorNumElements();
	unsigned WidenNumElts = NVT.getVectorNumElements();
	assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
	"Unexpected request for vector widening");

	SDLoc dl(InOp);
	if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
	InOp.getNumOperands() == 2) {
	SDValue N1 = InOp.getOperand(1);
	if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) \|\|
	N1.isUndef()) {
	InOp = InOp.getOperand(0);
	InVT = InOp.getSimpleValueType();
	InNumElts = InVT.getVectorNumElements();
	}
	}
	if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) \|\|
	ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
	SmallVector<SDValue, 16> Ops;
	for (unsigned i = 0; i < InNumElts; ++i)
	Ops.push_back(InOp.getOperand(i));

	EVT EltVT = InOp.getOperand(0).getValueType();

	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
	DAG.getUNDEF(EltVT);
	for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
	Ops.push_back(FillVal);
	return DAG.getBuildVector(NVT, dl, Ops);
	}
	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
	DAG.getUNDEF(NVT);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
	InOp, DAG.getIntPtrConstant(0, dl));
	}

	static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"MGATHER/MSCATTER are supported on AVX-512 arch only");

	MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
	SDValue Src = N->getValue();
	MVT VT = Src.getSimpleValueType();
	assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
	SDLoc dl(Op);

	SDValue Scale = N->getScale();
	SDValue Index = N->getIndex();
	SDValue Mask = N->getMask();
	SDValue Chain = N->getChain();
	SDValue BasePtr = N->getBasePtr();

	if (VT == MVT::v2f32 \|\| VT == MVT::v2i32) {
	assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
	// If the index is v2i64 and we have VLX we can use xmm for data and index.
	if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT));
	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
	return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops,
	N->getMemoryVT(), N->getMemOperand());
	}
	return SDValue();
	}

	MVT IndexVT = Index.getSimpleValueType();

	// If the index is v2i32, we're being called by type legalization and we
	// should just let the default handling take care of it.
	if (IndexVT == MVT::v2i32)
	return SDValue();

	// If we don't have VLX and neither the passthru or index is 512-bits, we
	// need to widen until one is.
	if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
	!Index.getSimpleValueType().is512BitVector()) {
	// Determine how much we need to widen by to get a 512-bit type.
	unsigned Factor = std::min(512/VT.getSizeInBits(),
	512/IndexVT.getSizeInBits());
	unsigned NumElts = VT.getVectorNumElements() * Factor;

	VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
	IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

	Src = ExtendToType(Src, VT, DAG);
	Index = ExtendToType(Index, IndexVT, DAG);
	Mask = ExtendToType(Mask, MaskVT, DAG, true);
	}

	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
	return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops,
	N->getMemoryVT(), N->getMemOperand());
	}

	static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {

	MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
	MVT VT = Op.getSimpleValueType();
	MVT ScalarVT = VT.getScalarType();
	SDValue Mask = N->getMask();
	MVT MaskVT = Mask.getSimpleValueType();
	SDValue PassThru = N->getPassThru();
	SDLoc dl(Op);

	// Handle AVX masked loads which don't support passthru other than 0.
	if (MaskVT.getVectorElementType() != MVT::i1) {
	// We also allow undef in the isel pattern.
	if (PassThru.isUndef() \|\| ISD::isBuildVectorAllZeros(PassThru.getNode()))
	return Op;

	SDValue NewLoad = DAG.getMaskedLoad(
	VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
	getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(),
	N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(),
	N->isExpandingLoad());
	// Emit a blend.
	SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
	return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
	}

	assert((!N->isExpandingLoad() \|\| Subtarget.hasAVX512()) &&
	"Expanding masked load is supported on AVX-512 target only!");

	assert((!N->isExpandingLoad() \|\| ScalarVT.getSizeInBits() >= 32) &&
	"Expanding masked load is supported for 32 and 64-bit types only!");

	assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
	"Cannot lower masked load op.");

	assert((ScalarVT.getSizeInBits() >= 32 \|\|
	(Subtarget.hasBWI() &&
	(ScalarVT == MVT::i8 \|\| ScalarVT == MVT::i16))) &&
	"Unsupported masked load op.");

	// This operation is legal for targets with VLX, but without
	// VLX the vector should be widened to 512 bit
	unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
	MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
	PassThru = ExtendToType(PassThru, WideDataVT, DAG);

	// Mask element has to be i1.
	assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
	"Unexpected mask type");

	MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);

	Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
	SDValue NewLoad = DAG.getMaskedLoad(
	WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
	PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
	N->getExtensionType(), N->isExpandingLoad());

	SDValue Extract =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0),
	DAG.getIntPtrConstant(0, dl));
	SDValue RetOps[] = {Extract, NewLoad.getValue(1)};
	return DAG.getMergeValues(RetOps, dl);
	}

	static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
	SDValue DataToStore = N->getValue();
	MVT VT = DataToStore.getSimpleValueType();
	MVT ScalarVT = VT.getScalarType();
	SDValue Mask = N->getMask();
	SDLoc dl(Op);

	assert((!N->isCompressingStore() \|\| Subtarget.hasAVX512()) &&
	"Expanding masked load is supported on AVX-512 target only!");

	assert((!N->isCompressingStore() \|\| ScalarVT.getSizeInBits() >= 32) &&
	"Expanding masked load is supported for 32 and 64-bit types only!");

	assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
	"Cannot lower masked store op.");

	assert((ScalarVT.getSizeInBits() >= 32 \|\|
	(Subtarget.hasBWI() &&
	(ScalarVT == MVT::i8 \|\| ScalarVT == MVT::i16))) &&
	"Unsupported masked store op.");

	// This operation is legal for targets with VLX, but without
	// VLX the vector should be widened to 512 bit
	unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
	MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);

	// Mask element has to be i1.
	assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
	"Unexpected mask type");

	MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);

	DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
	Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
	return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
	N->getOffset(), Mask, N->getMemoryVT(),
	N->getMemOperand(), N->getAddressingMode(),
	N->isTruncatingStore(), N->isCompressingStore());
	}

	static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX2() &&
	"MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only");

	MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue Index = N->getIndex();
	SDValue Mask = N->getMask();
	SDValue PassThru = N->getPassThru();
	MVT IndexVT = Index.getSimpleValueType();

	assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");

	// If the index is v2i32, we're being called by type legalization.
	if (IndexVT == MVT::v2i32)
	return SDValue();

	// If we don't have VLX and neither the passthru or index is 512-bits, we
	// need to widen until one is.
	MVT OrigVT = VT;
	if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
	!IndexVT.is512BitVector()) {
	// Determine how much we need to widen by to get a 512-bit type.
	unsigned Factor = std::min(512/VT.getSizeInBits(),
	512/IndexVT.getSizeInBits());

	unsigned NumElts = VT.getVectorNumElements() * Factor;

	VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
	IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

	PassThru = ExtendToType(PassThru, VT, DAG);
	Index = ExtendToType(Index, IndexVT, DAG);
	Mask = ExtendToType(Mask, MaskVT, DAG, true);
	}

	// Break dependency on the data register.
	if (PassThru.isUndef())
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);

	SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
	N->getScale() };
	SDValue NewGather = DAG.getMemIntrinsicNode(
	X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(),
	N->getMemOperand());
	SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT,
	NewGather, DAG.getIntPtrConstant(0, dl));
	return DAG.getMergeValues({Extract, NewGather.getValue(1)}, dl);
	}

	static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
	SDLoc dl(Op);
	SDValue Src = Op.getOperand(0);
	MVT DstVT = Op.getSimpleValueType();

	AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
	unsigned SrcAS = N->getSrcAddressSpace();

	assert(SrcAS != N->getDestAddressSpace() &&
	"addrspacecast must be between different address spaces");

	if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) {
	Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src);
	} else if (DstVT == MVT::i64) {
	Op = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src);
	} else if (DstVT == MVT::i32) {
	Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
	} else {
	report_fatal_error("Bad address space in addrspacecast");
	}
	return Op;
	}

	SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op,
	SelectionDAG &DAG) const {
	// TODO: Eventually, the lowering of these nodes should be informed by or
	// deferred to the GC strategy for the function in which they appear. For
	// now, however, they must be lowered to something. Since they are logically
	// no-ops in the case of a null GC strategy (or a GC strategy which does not
	// require special handling for these nodes), lower them as literal NOOPs for
	// the time being.
	SmallVector<SDValue, 2> Ops;
	Ops.push_back(Op.getOperand(0));
	if (Op->getGluedNode())
	Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));

	SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
	return SDValue(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
	}

	// Custom split CVTPS2PH with wide types.
	static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
	SDValue RC = Op.getOperand(1);
	Lo = DAG.getNode(X86ISD::CVTPS2PH, dl, LoVT, Lo, RC);
	Hi = DAG.getNode(X86ISD::CVTPS2PH, dl, HiVT, Hi, RC);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	}

	+static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
	+ SelectionDAG &DAG) {
	+ unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
	+
	+ // We don't support non-data prefetch without PREFETCHI.
	+ // Just preserve the chain.
	+ if (!IsData && !Subtarget.hasPREFETCHI())
	+ return Op.getOperand(0);
	+
	+ return Op;
	+}
	+
	static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
	unsigned OpNo) {
	const APInt Operand(32, OpNo);
	std::string OpNoStr = llvm::toString(Operand, 10, false);
	std::string Str(" $");

	std::string OpNoStr1(Str + OpNoStr); // e.g. " $1" (OpNo=1)
	std::string OpNoStr2(Str + "{" + OpNoStr + ":"); // With modifier, e.g. ${1:P}

	auto I = StringRef::npos;
	for (auto &AsmStr : AsmStrs) {
	// Match the OpNo string. We should match exactly to exclude match
	// sub-string, e.g. "$12" contain "$1"
	if (AsmStr.endswith(OpNoStr1))
	I = AsmStr.size() - OpNoStr1.size();

	// Get the index of operand in AsmStr.
	if (I == StringRef::npos)
	I = AsmStr.find(OpNoStr1 + ",");
	if (I == StringRef::npos)
	I = AsmStr.find(OpNoStr2);

	if (I == StringRef::npos)
	continue;

	assert(I > 0 && "Unexpected inline asm string!");
	// Remove the operand string and label (if exsit).
	// For example:
	// ".L__MSASMLABEL_.${:uid}__l:call dword ptr ${0:P}"
	// ==>
	// ".L__MSASMLABEL_.${:uid}__l:call dword ptr "
	// ==>
	// "call dword ptr "
	auto TmpStr = AsmStr.substr(0, I);
	I = TmpStr.rfind(':');
	if (I == StringRef::npos)
	return TmpStr;

	assert(I < TmpStr.size() && "Unexpected inline asm string!");
	auto Asm = TmpStr.drop_front(I + 1);
	return Asm;
	}

	return StringRef();
	}

	bool X86TargetLowering::isInlineAsmTargetBranch(
	const SmallVectorImpl<StringRef> &AsmStrs, unsigned OpNo) const {
	StringRef InstrStr = getInstrStrFromOpNo(AsmStrs, OpNo);

	if (InstrStr.contains("call"))
	return true;

	return false;
	}

	/// Provide custom lowering hooks for some operations.
	SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Should not custom lower this!");
	case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
	return LowerCMP_SWAP(Op, Subtarget, DAG);
	case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG);
	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
	case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget);
	case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
	case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG);
	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
	case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
	case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG);
	case ISD::VSELECT: return LowerVSELECT(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
	case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
	case ISD::SHL_PARTS:
	case ISD::SRA_PARTS:
	case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
	case ISD::FSHL:
	case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
	case ISD::STRICT_SINT_TO_FP:
	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
	case ISD::STRICT_UINT_TO_FP:
	case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
	case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
	case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
	case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
	case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
	case ISD::FP_TO_SINT:
	case ISD::STRICT_FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
	case ISD::FP_TO_SINT_SAT:
	case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG);
	case ISD::FP_EXTEND:
	case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
	case ISD::FP_ROUND:
	case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG);
	case ISD::FP16_TO_FP:
	case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG);
	case ISD::FP_TO_FP16:
	case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
	case ISD::FP_TO_BF16: return LowerFP_TO_BF16(Op, DAG);
	case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
	case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
	case ISD::FADD:
	case ISD::FSUB: return lowerFaddFsub(Op, DAG);
	case ISD::FROUND: return LowerFROUND(Op, DAG);
	case ISD::FABS:
	case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
	case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
	case ISD::LRINT:
	case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG);
	case ISD::SETCC:
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
	case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
	case ISD::SELECT: return LowerSELECT(Op, DAG);
	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
	case ISD::VASTART: return LowerVASTART(Op, DAG);
	case ISD::VAARG: return LowerVAARG(Op, DAG);
	case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	case ISD::INTRINSIC_VOID:
	case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
	case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG);
	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
	case ISD::FRAME_TO_ARGS_OFFSET:
	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
	case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
	case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
	case ISD::EH_SJLJ_SETUP_DISPATCH:
	return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
	case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
	case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
	case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
	case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG);
	case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
	case ISD::MULHS:
	case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
	case ISD::ROTL:
	case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
	case ISD::SRA:
	case ISD::SRL:
	case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
	case ISD::SADDO:
	case ISD::UADDO:
	case ISD::SSUBO:
	case ISD::USUBO: return LowerXALUO(Op, DAG);
	case ISD::SMULO:
	case ISD::UMULO: return LowerMULO(Op, Subtarget, DAG);
	case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
	case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
	case ISD::SADDO_CARRY:
	case ISD::SSUBO_CARRY:
	case ISD::ADDCARRY:
	case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
	case ISD::ADD:
	case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget);
	case ISD::UADDSAT:
	case ISD::SADDSAT:
	case ISD::USUBSAT:
	case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG, Subtarget);
	case ISD::SMAX:
	case ISD::SMIN:
	case ISD::UMAX:
	case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG);
	case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
	case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG);
	case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
	case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
	case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
	case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
	case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
	case ISD::GC_TRANSITION_START:
	case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG);
	case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
	case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
	+ case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
	}
	}

	/// Replace a node with an illegal result type with a new node built out of
	/// custom code.
	void X86TargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue>&Results,
	SelectionDAG &DAG) const {
	SDLoc dl(N);
	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "ReplaceNodeResults: ";
	N->dump(&DAG);
	#endif
	llvm_unreachable("Do not know how to custom type legalize this operation!");
	case X86ISD::CVTPH2PS: {
	EVT VT = N->getValueType(0);
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
	Lo = DAG.getNode(X86ISD::CVTPH2PS, dl, LoVT, Lo);
	Hi = DAG.getNode(X86ISD::CVTPH2PS, dl, HiVT, Hi);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	Results.push_back(Res);
	return;
	}
	case X86ISD::STRICT_CVTPH2PS: {
	EVT VT = N->getValueType(0);
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 1);
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
	Lo = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {LoVT, MVT::Other},
	{N->getOperand(0), Lo});
	Hi = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {HiVT, MVT::Other},
	{N->getOperand(0), Hi});
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	Lo.getValue(1), Hi.getValue(1));
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	Results.push_back(Res);
	Results.push_back(Chain);
	return;
	}
	case X86ISD::CVTPS2PH:
	Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG));
	return;
	case ISD::CTPOP: {
	assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
	// Use a v2i64 if possible.
	bool NoImplicitFloatOps =
	DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat);
	if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
	SDValue Wide =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0));
	Wide = DAG.getNode(ISD::CTPOP, dl, MVT::v2i64, Wide);
	// Bit count should fit in 32-bits, extract it as that and then zero
	// extend to i64. Otherwise we end up extracting bits 63:32 separately.
	Wide = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Wide);
	Wide = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Wide,
	DAG.getIntPtrConstant(0, dl));
	Wide = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Wide);
	Results.push_back(Wide);
	}
	return;
	}
	case ISD::MUL: {
	EVT VT = N->getValueType(0);
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	VT.getVectorElementType() == MVT::i8 && "Unexpected VT!");
	// Pre-promote these to vXi16 to avoid op legalization thinking all 16
	// elements are needed.
	MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
	SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
	SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
	SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
	Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	unsigned NumConcats = 16 / VT.getVectorNumElements();
	SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
	ConcatOps[0] = Res;
	Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
	Results.push_back(Res);
	return;
	}
	case ISD::SMULO:
	case ISD::UMULO: {
	EVT VT = N->getValueType(0);
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	VT == MVT::v2i32 && "Unexpected VT!");
	bool IsSigned = N->getOpcode() == ISD::SMULO;
	unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue Op0 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(0));
	SDValue Op1 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(1));
	SDValue Res = DAG.getNode(ISD::MUL, dl, MVT::v2i64, Op0, Op1);
	// Extract the high 32 bits from each result using PSHUFD.
	// TODO: Could use SRL+TRUNCATE but that doesn't become a PSHUFD.
	SDValue Hi = DAG.getBitcast(MVT::v4i32, Res);
	Hi = DAG.getVectorShuffle(MVT::v4i32, dl, Hi, Hi, {1, 3, -1, -1});
	Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Hi,
	DAG.getIntPtrConstant(0, dl));

	// Truncate the low bits of the result. This will become PSHUFD.
	Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);

	SDValue HiCmp;
	if (IsSigned) {
	// SMULO overflows if the high bits don't match the sign of the low.
	HiCmp = DAG.getNode(ISD::SRA, dl, VT, Res, DAG.getConstant(31, dl, VT));
	} else {
	// UMULO overflows if the high bits are non-zero.
	HiCmp = DAG.getConstant(0, dl, VT);
	}
	SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE);

	// Widen the result with by padding with undef.
	Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
	DAG.getUNDEF(VT));
	Results.push_back(Res);
	Results.push_back(Ovf);
	return;
	}
	case X86ISD::VPMADDWD: {
	// Legalize types for X86ISD::VPMADDWD by widening.
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");

	EVT VT = N->getValueType(0);
	EVT InVT = N->getOperand(0).getValueType();
	assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
	"Expected a VT that divides into 128 bits.");
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	"Unexpected type action!");
	unsigned NumConcat = 128 / InVT.getSizeInBits();

	EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
	InVT.getVectorElementType(),
	NumConcat * InVT.getVectorNumElements());
	EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
	VT.getVectorElementType(),
	NumConcat * VT.getVectorNumElements());

	SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
	Ops[0] = N->getOperand(0);
	SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
	Ops[0] = N->getOperand(1);
	SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);

	SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
	Results.push_back(Res);
	return;
	}
	// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
	case X86ISD::FMINC:
	case X86ISD::FMIN:
	case X86ISD::FMAXC:
	case X86ISD::FMAX: {
	EVT VT = N->getValueType(0);
	assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
	SDValue UNDEF = DAG.getUNDEF(VT);
	SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
	N->getOperand(0), UNDEF);
	SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
	N->getOperand(1), UNDEF);
	Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
	return;
	}
	case ISD::SDIV:
	case ISD::UDIV:
	case ISD::SREM:
	case ISD::UREM: {
	EVT VT = N->getValueType(0);
	if (VT.isVector()) {
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	"Unexpected type action!");
	// If this RHS is a constant splat vector we can widen this and let
	// division/remainder by constant optimize it.
	// TODO: Can we do something for non-splat?
	APInt SplatVal;
	if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) {
	unsigned NumConcats = 128 / VT.getSizeInBits();
	SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT));
	Ops0[0] = N->getOperand(0);
	EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0);
	SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT);
	SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1);
	Results.push_back(Res);
	}
	return;
	}

	SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
	Results.push_back(V);
	return;
	}
	case ISD::TRUNCATE: {
	MVT VT = N->getSimpleValueType(0);
	if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
	return;

	// The generic legalizer will try to widen the input type to the same
	// number of elements as the widened result type. But this isn't always
	// the best thing so do some custom legalization to avoid some cases.
	MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
	SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();

	unsigned InBits = InVT.getSizeInBits();
	if (128 % InBits == 0) {
	// 128 bit and smaller inputs should avoid truncate all together and
	// just use a build_vector that will become a shuffle.
	// TODO: Widen and use a shuffle directly?
	MVT InEltVT = InVT.getSimpleVT().getVectorElementType();
	EVT EltVT = VT.getVectorElementType();
	unsigned WidenNumElts = WidenVT.getVectorNumElements();
	SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
	// Use the original element count so we don't do more scalar opts than
	// necessary.
	unsigned MinElts = VT.getVectorNumElements();
	for (unsigned i=0; i < MinElts; ++i) {
	SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
	DAG.getIntPtrConstant(i, dl));
	Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val);
	}
	Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops));
	return;
	}
	// With AVX512 there are some cases that can use a target specific
	// truncate node to go from 256/512 to less than 128 with zeros in the
	// upper elements of the 128 bit result.
	if (Subtarget.hasAVX512() && isTypeLegal(InVT)) {
	// We can use VTRUNC directly if for 256 bits with VLX or for any 512.
	if ((InBits == 256 && Subtarget.hasVLX()) \|\| InBits == 512) {
	Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
	return;
	}
	// There's one case we can widen to 512 bits and use VTRUNC.
	if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) {
	In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In,
	DAG.getUNDEF(MVT::v4i64));
	Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
	return;
	}
	}
	if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 &&
	getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector &&
	isTypeLegal(MVT::v4i64)) {
	// Input needs to be split and output needs to widened. Let's use two
	// VTRUNCs, and shuffle their results together into the wider type.
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(In, dl);

	Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo);
	Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi);
	SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi,
	{ 0, 1, 2, 3, 16, 17, 18, 19,
	-1, -1, -1, -1, -1, -1, -1, -1 });
	Results.push_back(Res);
	return;
	}

	return;
	}
	case ISD::ANY_EXTEND:
	// Right now, only MVT::v8i8 has Custom action for an illegal type.
	// It's intended to custom handle the input type.
	assert(N->getValueType(0) == MVT::v8i8 &&
	"Do not know how to legalize this Node");
	return;
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND: {
	EVT VT = N->getValueType(0);
	SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();
	if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
	(InVT == MVT::v4i16 \|\| InVT == MVT::v4i8)){
	assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector &&
	"Unexpected type action!");
	assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode");
	// Custom split this so we can extend i8/i16->i32 invec. This is better
	// since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
	// sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
	// we allow the sra from the extend to i32 to be shared by the split.
	In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);

	// Fill a vector with sign bits for each element.
	SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
	SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT);

	// Create an unpackl and unpackh to interleave the sign bits then bitcast
	// to v2i64.
	SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
	{0, 4, 1, 5});
	Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
	SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
	{2, 6, 3, 7});
	Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);

	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	Results.push_back(Res);
	return;
	}

	if (VT == MVT::v16i32 \|\| VT == MVT::v8i64) {
	if (!InVT.is128BitVector()) {
	// Not a 128 bit vector, but maybe type legalization will promote
	// it to 128 bits.
	if (getTypeAction(*DAG.getContext(), InVT) != TypePromoteInteger)
	return;
	InVT = getTypeToTransformTo(*DAG.getContext(), InVT);
	if (!InVT.is128BitVector())
	return;

	// Promote the input to 128 bits. Type legalization will turn this into
	// zext_inreg/sext_inreg.
	In = DAG.getNode(N->getOpcode(), dl, InVT, In);
	}

	// Perform custom splitting instead of the two stage extend we would get
	// by default.
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	assert(isTypeLegal(LoVT) && "Split VT not legal?");

	SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG);

	// We need to shift the input over by half the number of elements.
	unsigned NumElts = InVT.getVectorNumElements();
	unsigned HalfNumElts = NumElts / 2;
	SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef);
	for (unsigned i = 0; i != HalfNumElts; ++i)
	ShufMask[i] = i + HalfNumElts;

	SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
	Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG);

	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
	Results.push_back(Res);
	}
	return;
	}
	case ISD::FP_TO_SINT:
	case ISD::STRICT_FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::STRICT_FP_TO_UINT: {
	bool IsStrict = N->isStrictFPOpcode();
	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT \|\|
	N->getOpcode() == ISD::STRICT_FP_TO_SINT;
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);
	SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
	EVT SrcVT = Src.getValueType();

	SDValue Res;
	if (isSoftFP16(SrcVT)) {
	EVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
	if (IsStrict) {
	Res =
	DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},
	{Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
	{NVT, MVT::Other}, {Chain, Src})});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(N->getOpcode(), dl, VT,
	DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src));
	}
	Results.push_back(Res);
	if (IsStrict)
	Results.push_back(Chain);

	return;
	}

	if (VT.isVector() && Subtarget.hasFP16() &&
	SrcVT.getVectorElementType() == MVT::f16) {
	EVT EleVT = VT.getVectorElementType();
	EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;

	if (SrcVT != MVT::v8f16) {
	SDValue Tmp =
	IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
	SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
	Ops[0] = Src;
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
	}

	if (IsStrict) {
	unsigned Opc =
	IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
	Res =
	DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
	Chain = Res.getValue(1);
	} else {
	unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
	Res = DAG.getNode(Opc, dl, ResVT, Src);
	}

	// TODO: Need to add exception check code for strict FP.
	if (EleVT.getSizeInBits() < 16) {
	MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8);
	Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res);

	// Now widen to 128 bits.
	unsigned NumConcats = 128 / TmpVT.getSizeInBits();
	MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats);
	SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
	ConcatOps[0] = Res;
	Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
	}

	Results.push_back(Res);
	if (IsStrict)
	Results.push_back(Chain);

	return;
	}

	if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	"Unexpected type action!");

	// Try to create a 128 bit vector, but don't exceed a 32 bit element.
	unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
	MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
	VT.getVectorNumElements());
	SDValue Res;
	SDValue Chain;
	if (IsStrict) {
	Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {PromoteVT, MVT::Other},
	{N->getOperand(0), Src});
	Chain = Res.getValue(1);
	} else
	Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);

	// Preserve what we know about the size of the original result. If the
	// result is v2i32, we have to manually widen the assert.
	if (PromoteVT == MVT::v2i32)
	Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
	DAG.getUNDEF(MVT::v2i32));

	Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl,
	Res.getValueType(), Res,
	DAG.getValueType(VT.getVectorElementType()));

	if (PromoteVT == MVT::v2i32)
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
	DAG.getIntPtrConstant(0, dl));

	// Truncate back to the original width.
	Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);

	// Now widen to 128 bits.
	unsigned NumConcats = 128 / VT.getSizeInBits();
	MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
	VT.getVectorNumElements() * NumConcats);
	SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
	ConcatOps[0] = Res;
	Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
	Results.push_back(Res);
	if (IsStrict)
	Results.push_back(Chain);
	return;
	}


	if (VT == MVT::v2i32) {
	assert((!IsStrict \|\| IsSigned \|\| Subtarget.hasAVX512()) &&
	"Strict unsigned conversion requires AVX512");
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	"Unexpected type action!");
	if (Src.getValueType() == MVT::v2f64) {
	if (!IsSigned && !Subtarget.hasAVX512()) {
	SDValue Res =
	expandFP_TO_UINT_SSE(MVT::v4i32, Src, dl, DAG, Subtarget);
	Results.push_back(Res);
	return;
	}

	unsigned Opc;
	if (IsStrict)
	Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
	else
	Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;

	// If we have VLX we can emit a target specific FP_TO_UINT node,.
	if (!IsSigned && !Subtarget.hasVLX()) {
	// Otherwise we can defer to the generic legalizer which will widen
	// the input as well. This will be further widened during op
	// legalization to v8i32<-v8f64.
	// For strict nodes we'll need to widen ourselves.
	// FIXME: Fix the type legalizer to safely widen strict nodes?
	if (!IsStrict)
	return;
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src,
	DAG.getConstantFP(0.0, dl, MVT::v2f64));
	Opc = N->getOpcode();
	}
	SDValue Res;
	SDValue Chain;
	if (IsStrict) {
	Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other},
	{N->getOperand(0), Src});
	Chain = Res.getValue(1);
	} else {
	Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
	}
	Results.push_back(Res);
	if (IsStrict)
	Results.push_back(Chain);
	return;
	}

	// Custom widen strict v2f32->v2i32 by padding with zeros.
	// FIXME: Should generic type legalizer do this?
	if (Src.getValueType() == MVT::v2f32 && IsStrict) {
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
	DAG.getConstantFP(0.0, dl, MVT::v2f32));
	SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other},
	{N->getOperand(0), Src});
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	return;
	}

	// The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
	// so early out here.
	return;
	}

	assert(!VT.isVector() && "Vectors should have been handled above!");

	if ((Subtarget.hasDQI() && VT == MVT::i64 &&
	(SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)) \|\|
	(Subtarget.hasFP16() && SrcVT == MVT::f16)) {
	assert(!Subtarget.is64Bit() && "i64 should be legal");
	unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
	// If we use a 128-bit result we might need to use a target specific node.
	unsigned SrcElts =
	std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits());
	MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts);
	MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts);
	unsigned Opc = N->getOpcode();
	if (NumElts != SrcElts) {
	if (IsStrict)
	Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
	else
	Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
	}

	SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
	SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
	DAG.getConstantFP(0.0, dl, VecInVT), Src,
	ZeroIdx);
	SDValue Chain;
	if (IsStrict) {
	SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
	Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res);
	Chain = Res.getValue(1);
	} else
	Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res);
	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);
	Results.push_back(Res);
	if (IsStrict)
	Results.push_back(Chain);
	return;
	}

	if (VT == MVT::i128 && Subtarget.isTargetWin64()) {
	SDValue Chain;
	SDValue V = LowerWin64_FP_TO_INT128(SDValue(N, 0), DAG, Chain);
	Results.push_back(V);
	if (IsStrict)
	Results.push_back(Chain);
	return;
	}

	if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) {
	Results.push_back(V);
	if (IsStrict)
	Results.push_back(Chain);
	}
	return;
	}
	case ISD::LRINT:
	case ISD::LLRINT: {
	if (SDValue V = LRINT_LLRINTHelper(N, DAG))
	Results.push_back(V);
	return;
	}

	case ISD::SINT_TO_FP:
	case ISD::STRICT_SINT_TO_FP:
	case ISD::UINT_TO_FP:
	case ISD::STRICT_UINT_TO_FP: {
	bool IsStrict = N->isStrictFPOpcode();
	bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP \|\|
	N->getOpcode() == ISD::STRICT_SINT_TO_FP;
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);
	if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&
	Subtarget.hasVLX()) {
	if (Src.getValueType().getVectorElementType() == MVT::i16)
	return;

	if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
	IsStrict ? DAG.getConstant(0, dl, MVT::v2i32)
	: DAG.getUNDEF(MVT::v2i32));
	if (IsStrict) {
	unsigned Opc =
	IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
	SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
	{N->getOperand(0), Src});
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	} else {
	unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
	Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src));
	}
	return;
	}
	if (VT != MVT::v2f32)
	return;
	EVT SrcVT = Src.getValueType();
	if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
	if (IsStrict) {
	unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P
	: X86ISD::STRICT_CVTUI2P;
	SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other},
	{N->getOperand(0), Src});
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	} else {
	unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
	Results.push_back(DAG.getNode(Opc, dl, MVT::v4f32, Src));
	}
	return;
	}
	if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
	Subtarget.hasSSE41() && !Subtarget.hasAVX512()) {
	SDValue Zero = DAG.getConstant(0, dl, SrcVT);
	SDValue One = DAG.getConstant(1, dl, SrcVT);
	SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT,
	DAG.getNode(ISD::SRL, dl, SrcVT, Src, One),
	DAG.getNode(ISD::AND, dl, SrcVT, Src, One));
	SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT);
	SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src);
	SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32));
	for (int i = 0; i != 2; ++i) {
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
	SignSrc, DAG.getIntPtrConstant(i, dl));
	if (IsStrict)
	SignCvts[i] =
	DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other},
	{N->getOperand(0), Elt});
	else
	SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Elt);
	};
	SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts);
	SDValue Slow, Chain;
	if (IsStrict) {
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	SignCvts[0].getValue(1), SignCvts[1].getValue(1));
	Slow = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v4f32, MVT::Other},
	{Chain, SignCvt, SignCvt});
	Chain = Slow.getValue(1);
	} else {
	Slow = DAG.getNode(ISD::FADD, dl, MVT::v4f32, SignCvt, SignCvt);
	}
	IsNeg = DAG.getBitcast(MVT::v4i32, IsNeg);
	IsNeg =
	DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1});
	SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt);
	Results.push_back(Cvt);
	if (IsStrict)
	Results.push_back(Chain);
	return;
	}

	if (SrcVT != MVT::v2i32)
	return;

	if (IsSigned \|\| Subtarget.hasAVX512()) {
	if (!IsStrict)
	return;

	// Custom widen strict v2i32->v2f32 to avoid scalarization.
	// FIXME: Should generic type legalizer do this?
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
	DAG.getConstant(0, dl, MVT::v2i32));
	SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
	{N->getOperand(0), Src});
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	return;
	}

	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
	SDValue VBias =
	DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64);
	SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
	DAG.getBitcast(MVT::v2i64, VBias));
	Or = DAG.getBitcast(MVT::v2f64, Or);
	if (IsStrict) {
	SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
	{N->getOperand(0), Or, VBias});
	SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl,
	{MVT::v4f32, MVT::Other},
	{Sub.getValue(1), Sub});
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	} else {
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
	Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
	}
	return;
	}
	case ISD::STRICT_FP_ROUND:
	case ISD::FP_ROUND: {
	bool IsStrict = N->isStrictFPOpcode();
	SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);
	SDValue Rnd = N->getOperand(IsStrict ? 2 : 1);
	EVT SrcVT = Src.getValueType();
	EVT VT = N->getValueType(0);
	SDValue V;
	if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
	SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
	: DAG.getUNDEF(MVT::v2f32);
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
	}
	if (!Subtarget.hasFP16() && VT.getVectorElementType() == MVT::f16) {
	assert(Subtarget.hasF16C() && "Cannot widen f16 without F16C");
	if (SrcVT.getVectorElementType() != MVT::f32)
	return;

	if (IsStrict)
	V = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other},
	{Chain, Src, Rnd});
	else
	V = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Src, Rnd);

	Results.push_back(DAG.getBitcast(MVT::v8f16, V));
	if (IsStrict)
	Results.push_back(V.getValue(1));
	return;
	}
	if (!isTypeLegal(Src.getValueType()))
	return;
	EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
	if (IsStrict)
	V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
	{Chain, Src});
	else
	V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
	Results.push_back(V);
	if (IsStrict)
	Results.push_back(V.getValue(1));
	return;
	}
	case ISD::FP_EXTEND:
	case ISD::STRICT_FP_EXTEND: {
	// Right now, only MVT::v2f32 has OperationAction for FP_EXTEND.
	// No other ValueType for FP_EXTEND should reach this point.
	assert(N->getValueType(0) == MVT::v2f32 &&
	"Do not know how to legalize this Node");
	if (!Subtarget.hasFP16() \|\| !Subtarget.hasVLX())
	return;
	bool IsStrict = N->isStrictFPOpcode();
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);
	SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
	: DAG.getUNDEF(MVT::v2f16);
	SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
	if (IsStrict)
	V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other},
	{N->getOperand(0), V});
	else
	V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V);
	Results.push_back(V);
	if (IsStrict)
	Results.push_back(V.getValue(1));
	return;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	unsigned IntNo = N->getConstantOperandVal(1);
	switch (IntNo) {
	default : llvm_unreachable("Do not know how to custom type "
	"legalize this intrinsic operation!");
	case Intrinsic::x86_rdtsc:
	return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
	Results);
	case Intrinsic::x86_rdtscp:
	return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget,
	Results);
	case Intrinsic::x86_rdpmc:
	expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
	Results);
	return;
	case Intrinsic::x86_rdpru:
	expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget,
	Results);
	return;
	case Intrinsic::x86_xgetbv:
	expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
	Results);
	return;
	}
	}
	case ISD::READCYCLECOUNTER: {
	return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results);
	}
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
	EVT T = N->getValueType(0);
	assert((T == MVT::i64 \|\| T == MVT::i128) && "can only expand cmpxchg pair");
	bool Regs64bit = T == MVT::i128;
	assert((!Regs64bit \|\| Subtarget.canUseCMPXCHG16B()) &&
	"64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
	MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
	SDValue cpInL, cpInH;
	cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
	DAG.getConstant(0, dl, HalfT));
	cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
	DAG.getConstant(1, dl, HalfT));
	cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
	Regs64bit ? X86::RAX : X86::EAX,
	cpInL, SDValue());
	cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
	Regs64bit ? X86::RDX : X86::EDX,
	cpInH, cpInL.getValue(1));
	SDValue swapInL, swapInH;
	swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
	DAG.getConstant(0, dl, HalfT));
	swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
	DAG.getConstant(1, dl, HalfT));
	swapInH =
	DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX,
	swapInH, cpInH.getValue(1));

	// In 64-bit mode we might need the base pointer in RBX, but we can't know
	// until later. So we keep the RBX input in a vreg and use a custom
	// inserter.
	// Since RBX will be a reserved register the register allocator will not
	// make sure its value will be properly saved and restored around this
	// live-range.
	SDValue Result;
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
	if (Regs64bit) {
	SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL,
	swapInH.getValue(1)};
	Result =
	DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO);
	} else {
	swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL,
	swapInH.getValue(1));
	SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),
	swapInL.getValue(1)};
	Result =
	DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO);
	}

	SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
	Regs64bit ? X86::RAX : X86::EAX,
	HalfT, Result.getValue(1));
	SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
	Regs64bit ? X86::RDX : X86::EDX,
	HalfT, cpOutL.getValue(2));
	SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};

	SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
	MVT::i32, cpOutH.getValue(2));
	SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG);
	Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));

	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
	Results.push_back(Success);
	Results.push_back(EFLAGS.getValue(1));
	return;
	}
	case ISD::ATOMIC_LOAD: {
	assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
	bool NoImplicitFloatOps =
	DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat);
	if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
	auto *Node = cast<AtomicSDNode>(N);
	if (Subtarget.hasSSE1()) {
	// Use a VZEXT_LOAD which will be selected as MOVQ or XORPS+MOVLPS.
	// Then extract the lower 64-bits.
	MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
	SDVTList Tys = DAG.getVTList(LdVT, MVT::Other);
	SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
	SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
	MVT::i64, Node->getMemOperand());
	if (Subtarget.hasSSE2()) {
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
	DAG.getIntPtrConstant(0, dl));
	Results.push_back(Res);
	Results.push_back(Ld.getValue(1));
	return;
	}
	// We use an alternative sequence for SSE1 that extracts as v2f32 and
	// then casts to i64. This avoids a 128-bit stack temporary being
	// created by type legalization if we were to cast v4f32->v2i64.
	SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld,
	DAG.getIntPtrConstant(0, dl));
	Res = DAG.getBitcast(MVT::i64, Res);
	Results.push_back(Res);
	Results.push_back(Ld.getValue(1));
	return;
	}
	if (Subtarget.hasX87()) {
	// First load this into an 80-bit X87 register. This will put the whole
	// integer into the significand.
	SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
	SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
	SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD,
	dl, Tys, Ops, MVT::i64,
	Node->getMemOperand());
	SDValue Chain = Result.getValue(1);

	// Now store the X87 register to a stack temporary and convert to i64.
	// This store is not atomic and doesn't need to be.
	// FIXME: We don't need a stack temporary if the result of the load
	// is already being stored. We could just directly store there.
	SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
	int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
	SDValue StoreOps[] = { Chain, Result, StackPtr };
	Chain = DAG.getMemIntrinsicNode(
	X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
	MPI, std::nullopt /Align/, MachineMemOperand::MOStore);

	// Finally load the value back from the stack temporary and return it.
	// This load is not atomic and doesn't need to be.
	// This load will be further type legalized.
	Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
	Results.push_back(Result);
	Results.push_back(Result.getValue(1));
	return;
	}
	}
	// TODO: Use MOVLPS when SSE1 is available?
	// Delegate to generic TypeLegalization. Situations we can really handle
	// should have already been dealt with by AtomicExpandPass.cpp.
	break;
	}
	case ISD::ATOMIC_SWAP:
	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_AND:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_NAND:
	case ISD::ATOMIC_LOAD_MIN:
	case ISD::ATOMIC_LOAD_MAX:
	case ISD::ATOMIC_LOAD_UMIN:
	case ISD::ATOMIC_LOAD_UMAX:
	// Delegate to generic TypeLegalization. Situations we can really handle
	// should have already been dealt with by AtomicExpandPass.cpp.
	break;

	case ISD::BITCAST: {
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	EVT DstVT = N->getValueType(0);
	EVT SrcVT = N->getOperand(0).getValueType();

	// If this is a bitcast from a v64i1 k-register to a i64 on a 32-bit target
	// we can split using the k-register rather than memory.
	if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) {
	assert(!Subtarget.is64Bit() && "Expected 32-bit mode");
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
	Lo = DAG.getBitcast(MVT::i32, Lo);
	Hi = DAG.getBitcast(MVT::i32, Hi);
	SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
	Results.push_back(Res);
	return;
	}

	if (DstVT.isVector() && SrcVT == MVT::x86mmx) {
	// FIXME: Use v4f32 for SSE1?
	assert(Subtarget.hasSSE2() && "Requires SSE2");
	assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector &&
	"Unexpected type action!");
	EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT);
	SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64,
	N->getOperand(0));
	Res = DAG.getBitcast(WideVT, Res);
	Results.push_back(Res);
	return;
	}

	return;
	}
	case ISD::MGATHER: {
	EVT VT = N->getValueType(0);
	if ((VT == MVT::v2f32 \|\| VT == MVT::v2i32) &&
	(Subtarget.hasVLX() \|\| !Subtarget.hasAVX512())) {
	auto *Gather = cast<MaskedGatherSDNode>(N);
	SDValue Index = Gather->getIndex();
	if (Index.getValueType() != MVT::v2i64)
	return;
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	"Unexpected type action!");
	EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue Mask = Gather->getMask();
	assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
	SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT,
	Gather->getPassThru(),
	DAG.getUNDEF(VT));
	if (!Subtarget.hasVLX()) {
	// We need to widen the mask, but the instruction will only use 2
	// of its elements. So we can use undef.
	Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
	DAG.getUNDEF(MVT::v2i1));
	Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
	}
	SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
	Gather->getBasePtr(), Index, Gather->getScale() };
	SDValue Res = DAG.getMemIntrinsicNode(
	X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops,
	Gather->getMemoryVT(), Gather->getMemOperand());
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	return;
	}
	return;
	}
	case ISD::LOAD: {
	// Use an f64/i64 load and a scalar_to_vector for v2f32/v2i32 loads. This
	// avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
	// cast since type legalization will try to use an i64 load.
	MVT VT = N->getSimpleValueType(0);
	assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
	assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
	"Unexpected type action!");
	if (!ISD::isNON_EXTLoad(N))
	return;
	auto *Ld = cast<LoadSDNode>(N);
	if (Subtarget.hasSSE2()) {
	MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
	SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), Ld->getOriginalAlign(),
	Ld->getMemOperand()->getFlags());
	SDValue Chain = Res.getValue(1);
	MVT VecVT = MVT::getVectorVT(LdVT, 2);
	Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res);
	EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
	Res = DAG.getBitcast(WideVT, Res);
	Results.push_back(Res);
	Results.push_back(Chain);
	return;
	}
	assert(Subtarget.hasSSE1() && "Expected SSE");
	SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
	SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
	SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
	MVT::i64, Ld->getMemOperand());
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	return;
	}
	case ISD::ADDRSPACECAST: {
	SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG);
	Results.push_back(V);
	return;
	}
	case ISD::BITREVERSE: {
	assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
	assert(Subtarget.hasXOP() && "Expected XOP");
	// We can use VPPERM by copying to a vector register and back. We'll need
	// to move the scalar in two i32 pieces.
	Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
	return;
	}
	case ISD::EXTRACT_VECTOR_ELT: {
	// f16 = extract vXf16 %vec, i64 %idx
	assert(N->getSimpleValueType(0) == MVT::f16 &&
	"Unexpected Value type of EXTRACT_VECTOR_ELT!");
	assert(Subtarget.hasFP16() && "Expected FP16");
	SDValue VecOp = N->getOperand(0);
	EVT ExtVT = VecOp.getValueType().changeVectorElementTypeToInteger();
	SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0));
	Split = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Split,
	N->getOperand(1));
	Split = DAG.getBitcast(MVT::f16, Split);
	Results.push_back(Split);
	return;
	}
	}
	}

	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((X86ISD::NodeType)Opcode) {
	case X86ISD::FIRST_NUMBER: break;
	#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
	NODE_NAME_CASE(BSF)
	NODE_NAME_CASE(BSR)
	NODE_NAME_CASE(FSHL)
	NODE_NAME_CASE(FSHR)
	NODE_NAME_CASE(FAND)
	NODE_NAME_CASE(FANDN)
	NODE_NAME_CASE(FOR)
	NODE_NAME_CASE(FXOR)
	NODE_NAME_CASE(FILD)
	NODE_NAME_CASE(FIST)
	NODE_NAME_CASE(FP_TO_INT_IN_MEM)
	NODE_NAME_CASE(FLD)
	NODE_NAME_CASE(FST)
	NODE_NAME_CASE(CALL)
	NODE_NAME_CASE(CALL_RVMARKER)
	NODE_NAME_CASE(BT)
	NODE_NAME_CASE(CMP)
	NODE_NAME_CASE(FCMP)
	NODE_NAME_CASE(STRICT_FCMP)
	NODE_NAME_CASE(STRICT_FCMPS)
	NODE_NAME_CASE(COMI)
	NODE_NAME_CASE(UCOMI)
	NODE_NAME_CASE(CMPM)
	NODE_NAME_CASE(CMPMM)
	NODE_NAME_CASE(STRICT_CMPM)
	NODE_NAME_CASE(CMPMM_SAE)
	NODE_NAME_CASE(SETCC)
	NODE_NAME_CASE(SETCC_CARRY)
	NODE_NAME_CASE(FSETCC)
	NODE_NAME_CASE(FSETCCM)
	NODE_NAME_CASE(FSETCCM_SAE)
	NODE_NAME_CASE(CMOV)
	NODE_NAME_CASE(BRCOND)
	NODE_NAME_CASE(RET_FLAG)
	NODE_NAME_CASE(IRET)
	NODE_NAME_CASE(REP_STOS)
	NODE_NAME_CASE(REP_MOVS)
	NODE_NAME_CASE(GlobalBaseReg)
	NODE_NAME_CASE(Wrapper)
	NODE_NAME_CASE(WrapperRIP)
	NODE_NAME_CASE(MOVQ2DQ)
	NODE_NAME_CASE(MOVDQ2Q)
	NODE_NAME_CASE(MMX_MOVD2W)
	NODE_NAME_CASE(MMX_MOVW2D)
	NODE_NAME_CASE(PEXTRB)
	NODE_NAME_CASE(PEXTRW)
	NODE_NAME_CASE(INSERTPS)
	NODE_NAME_CASE(PINSRB)
	NODE_NAME_CASE(PINSRW)
	NODE_NAME_CASE(PSHUFB)
	NODE_NAME_CASE(ANDNP)
	NODE_NAME_CASE(BLENDI)
	NODE_NAME_CASE(BLENDV)
	NODE_NAME_CASE(HADD)
	NODE_NAME_CASE(HSUB)
	NODE_NAME_CASE(FHADD)
	NODE_NAME_CASE(FHSUB)
	NODE_NAME_CASE(CONFLICT)
	NODE_NAME_CASE(FMAX)
	NODE_NAME_CASE(FMAXS)
	NODE_NAME_CASE(FMAX_SAE)
	NODE_NAME_CASE(FMAXS_SAE)
	NODE_NAME_CASE(FMIN)
	NODE_NAME_CASE(FMINS)
	NODE_NAME_CASE(FMIN_SAE)
	NODE_NAME_CASE(FMINS_SAE)
	NODE_NAME_CASE(FMAXC)
	NODE_NAME_CASE(FMINC)
	NODE_NAME_CASE(FRSQRT)
	NODE_NAME_CASE(FRCP)
	NODE_NAME_CASE(EXTRQI)
	NODE_NAME_CASE(INSERTQI)
	NODE_NAME_CASE(TLSADDR)
	NODE_NAME_CASE(TLSBASEADDR)
	NODE_NAME_CASE(TLSCALL)
	NODE_NAME_CASE(EH_SJLJ_SETJMP)
	NODE_NAME_CASE(EH_SJLJ_LONGJMP)
	NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
	NODE_NAME_CASE(EH_RETURN)
	NODE_NAME_CASE(TC_RETURN)
	NODE_NAME_CASE(FNSTCW16m)
	NODE_NAME_CASE(FLDCW16m)
	NODE_NAME_CASE(LCMPXCHG_DAG)
	NODE_NAME_CASE(LCMPXCHG8_DAG)
	NODE_NAME_CASE(LCMPXCHG16_DAG)
	NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
	NODE_NAME_CASE(LADD)
	NODE_NAME_CASE(LSUB)
	NODE_NAME_CASE(LOR)
	NODE_NAME_CASE(LXOR)
	NODE_NAME_CASE(LAND)
	NODE_NAME_CASE(LBTS)
	NODE_NAME_CASE(LBTC)
	NODE_NAME_CASE(LBTR)
	NODE_NAME_CASE(LBTS_RM)
	NODE_NAME_CASE(LBTC_RM)
	NODE_NAME_CASE(LBTR_RM)
	NODE_NAME_CASE(AADD)
	NODE_NAME_CASE(AOR)
	NODE_NAME_CASE(AXOR)
	NODE_NAME_CASE(AAND)
	NODE_NAME_CASE(VZEXT_MOVL)
	NODE_NAME_CASE(VZEXT_LOAD)
	NODE_NAME_CASE(VEXTRACT_STORE)
	NODE_NAME_CASE(VTRUNC)
	NODE_NAME_CASE(VTRUNCS)
	NODE_NAME_CASE(VTRUNCUS)
	NODE_NAME_CASE(VMTRUNC)
	NODE_NAME_CASE(VMTRUNCS)
	NODE_NAME_CASE(VMTRUNCUS)
	NODE_NAME_CASE(VTRUNCSTORES)
	NODE_NAME_CASE(VTRUNCSTOREUS)
	NODE_NAME_CASE(VMTRUNCSTORES)
	NODE_NAME_CASE(VMTRUNCSTOREUS)
	NODE_NAME_CASE(VFPEXT)
	NODE_NAME_CASE(STRICT_VFPEXT)
	NODE_NAME_CASE(VFPEXT_SAE)
	NODE_NAME_CASE(VFPEXTS)
	NODE_NAME_CASE(VFPEXTS_SAE)
	NODE_NAME_CASE(VFPROUND)
	NODE_NAME_CASE(STRICT_VFPROUND)
	NODE_NAME_CASE(VMFPROUND)
	NODE_NAME_CASE(VFPROUND_RND)
	NODE_NAME_CASE(VFPROUNDS)
	NODE_NAME_CASE(VFPROUNDS_RND)
	NODE_NAME_CASE(VSHLDQ)
	NODE_NAME_CASE(VSRLDQ)
	NODE_NAME_CASE(VSHL)
	NODE_NAME_CASE(VSRL)
	NODE_NAME_CASE(VSRA)
	NODE_NAME_CASE(VSHLI)
	NODE_NAME_CASE(VSRLI)
	NODE_NAME_CASE(VSRAI)
	NODE_NAME_CASE(VSHLV)
	NODE_NAME_CASE(VSRLV)
	NODE_NAME_CASE(VSRAV)
	NODE_NAME_CASE(VROTLI)
	NODE_NAME_CASE(VROTRI)
	NODE_NAME_CASE(VPPERM)
	NODE_NAME_CASE(CMPP)
	NODE_NAME_CASE(STRICT_CMPP)
	NODE_NAME_CASE(PCMPEQ)
	NODE_NAME_CASE(PCMPGT)
	NODE_NAME_CASE(PHMINPOS)
	NODE_NAME_CASE(ADD)
	NODE_NAME_CASE(SUB)
	NODE_NAME_CASE(ADC)
	NODE_NAME_CASE(SBB)
	NODE_NAME_CASE(SMUL)
	NODE_NAME_CASE(UMUL)
	NODE_NAME_CASE(OR)
	NODE_NAME_CASE(XOR)
	NODE_NAME_CASE(AND)
	NODE_NAME_CASE(BEXTR)
	NODE_NAME_CASE(BEXTRI)
	NODE_NAME_CASE(BZHI)
	NODE_NAME_CASE(PDEP)
	NODE_NAME_CASE(PEXT)
	NODE_NAME_CASE(MUL_IMM)
	NODE_NAME_CASE(MOVMSK)
	NODE_NAME_CASE(PTEST)
	NODE_NAME_CASE(TESTP)
	NODE_NAME_CASE(KORTEST)
	NODE_NAME_CASE(KTEST)
	NODE_NAME_CASE(KADD)
	NODE_NAME_CASE(KSHIFTL)
	NODE_NAME_CASE(KSHIFTR)
	NODE_NAME_CASE(PACKSS)
	NODE_NAME_CASE(PACKUS)
	NODE_NAME_CASE(PALIGNR)
	NODE_NAME_CASE(VALIGN)
	NODE_NAME_CASE(VSHLD)
	NODE_NAME_CASE(VSHRD)
	NODE_NAME_CASE(VSHLDV)
	NODE_NAME_CASE(VSHRDV)
	NODE_NAME_CASE(PSHUFD)
	NODE_NAME_CASE(PSHUFHW)
	NODE_NAME_CASE(PSHUFLW)
	NODE_NAME_CASE(SHUFP)
	NODE_NAME_CASE(SHUF128)
	NODE_NAME_CASE(MOVLHPS)
	NODE_NAME_CASE(MOVHLPS)
	NODE_NAME_CASE(MOVDDUP)
	NODE_NAME_CASE(MOVSHDUP)
	NODE_NAME_CASE(MOVSLDUP)
	NODE_NAME_CASE(MOVSD)
	NODE_NAME_CASE(MOVSS)
	NODE_NAME_CASE(MOVSH)
	NODE_NAME_CASE(UNPCKL)
	NODE_NAME_CASE(UNPCKH)
	NODE_NAME_CASE(VBROADCAST)
	NODE_NAME_CASE(VBROADCAST_LOAD)
	NODE_NAME_CASE(VBROADCASTM)
	NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
	NODE_NAME_CASE(VPERMILPV)
	NODE_NAME_CASE(VPERMILPI)
	NODE_NAME_CASE(VPERM2X128)
	NODE_NAME_CASE(VPERMV)
	NODE_NAME_CASE(VPERMV3)
	NODE_NAME_CASE(VPERMI)
	NODE_NAME_CASE(VPTERNLOG)
	NODE_NAME_CASE(VFIXUPIMM)
	NODE_NAME_CASE(VFIXUPIMM_SAE)
	NODE_NAME_CASE(VFIXUPIMMS)
	NODE_NAME_CASE(VFIXUPIMMS_SAE)
	NODE_NAME_CASE(VRANGE)
	NODE_NAME_CASE(VRANGE_SAE)
	NODE_NAME_CASE(VRANGES)
	NODE_NAME_CASE(VRANGES_SAE)
	NODE_NAME_CASE(PMULUDQ)
	NODE_NAME_CASE(PMULDQ)
	NODE_NAME_CASE(PSADBW)
	NODE_NAME_CASE(DBPSADBW)
	NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
	NODE_NAME_CASE(VAARG_64)
	NODE_NAME_CASE(VAARG_X32)
	NODE_NAME_CASE(DYN_ALLOCA)
	NODE_NAME_CASE(MFENCE)
	NODE_NAME_CASE(SEG_ALLOCA)
	NODE_NAME_CASE(PROBED_ALLOCA)
	NODE_NAME_CASE(RDRAND)
	NODE_NAME_CASE(RDSEED)
	NODE_NAME_CASE(RDPKRU)
	NODE_NAME_CASE(WRPKRU)
	NODE_NAME_CASE(VPMADDUBSW)
	NODE_NAME_CASE(VPMADDWD)
	NODE_NAME_CASE(VPSHA)
	NODE_NAME_CASE(VPSHL)
	NODE_NAME_CASE(VPCOM)
	NODE_NAME_CASE(VPCOMU)
	NODE_NAME_CASE(VPERMIL2)
	NODE_NAME_CASE(FMSUB)
	NODE_NAME_CASE(STRICT_FMSUB)
	NODE_NAME_CASE(FNMADD)
	NODE_NAME_CASE(STRICT_FNMADD)
	NODE_NAME_CASE(FNMSUB)
	NODE_NAME_CASE(STRICT_FNMSUB)
	NODE_NAME_CASE(FMADDSUB)
	NODE_NAME_CASE(FMSUBADD)
	NODE_NAME_CASE(FMADD_RND)
	NODE_NAME_CASE(FNMADD_RND)
	NODE_NAME_CASE(FMSUB_RND)
	NODE_NAME_CASE(FNMSUB_RND)
	NODE_NAME_CASE(FMADDSUB_RND)
	NODE_NAME_CASE(FMSUBADD_RND)
	NODE_NAME_CASE(VFMADDC)
	NODE_NAME_CASE(VFMADDC_RND)
	NODE_NAME_CASE(VFCMADDC)
	NODE_NAME_CASE(VFCMADDC_RND)
	NODE_NAME_CASE(VFMULC)
	NODE_NAME_CASE(VFMULC_RND)
	NODE_NAME_CASE(VFCMULC)
	NODE_NAME_CASE(VFCMULC_RND)
	NODE_NAME_CASE(VFMULCSH)
	NODE_NAME_CASE(VFMULCSH_RND)
	NODE_NAME_CASE(VFCMULCSH)
	NODE_NAME_CASE(VFCMULCSH_RND)
	NODE_NAME_CASE(VFMADDCSH)
	NODE_NAME_CASE(VFMADDCSH_RND)
	NODE_NAME_CASE(VFCMADDCSH)
	NODE_NAME_CASE(VFCMADDCSH_RND)
	NODE_NAME_CASE(VPMADD52H)
	NODE_NAME_CASE(VPMADD52L)
	NODE_NAME_CASE(VRNDSCALE)
	NODE_NAME_CASE(STRICT_VRNDSCALE)
	NODE_NAME_CASE(VRNDSCALE_SAE)
	NODE_NAME_CASE(VRNDSCALES)
	NODE_NAME_CASE(VRNDSCALES_SAE)
	NODE_NAME_CASE(VREDUCE)
	NODE_NAME_CASE(VREDUCE_SAE)
	NODE_NAME_CASE(VREDUCES)
	NODE_NAME_CASE(VREDUCES_SAE)
	NODE_NAME_CASE(VGETMANT)
	NODE_NAME_CASE(VGETMANT_SAE)
	NODE_NAME_CASE(VGETMANTS)
	NODE_NAME_CASE(VGETMANTS_SAE)
	NODE_NAME_CASE(PCMPESTR)
	NODE_NAME_CASE(PCMPISTR)
	NODE_NAME_CASE(XTEST)
	NODE_NAME_CASE(COMPRESS)
	NODE_NAME_CASE(EXPAND)
	NODE_NAME_CASE(SELECTS)
	NODE_NAME_CASE(ADDSUB)
	NODE_NAME_CASE(RCP14)
	NODE_NAME_CASE(RCP14S)
	NODE_NAME_CASE(RCP28)
	NODE_NAME_CASE(RCP28_SAE)
	NODE_NAME_CASE(RCP28S)
	NODE_NAME_CASE(RCP28S_SAE)
	NODE_NAME_CASE(EXP2)
	NODE_NAME_CASE(EXP2_SAE)
	NODE_NAME_CASE(RSQRT14)
	NODE_NAME_CASE(RSQRT14S)
	NODE_NAME_CASE(RSQRT28)
	NODE_NAME_CASE(RSQRT28_SAE)
	NODE_NAME_CASE(RSQRT28S)
	NODE_NAME_CASE(RSQRT28S_SAE)
	NODE_NAME_CASE(FADD_RND)
	NODE_NAME_CASE(FADDS)
	NODE_NAME_CASE(FADDS_RND)
	NODE_NAME_CASE(FSUB_RND)
	NODE_NAME_CASE(FSUBS)
	NODE_NAME_CASE(FSUBS_RND)
	NODE_NAME_CASE(FMUL_RND)
	NODE_NAME_CASE(FMULS)
	NODE_NAME_CASE(FMULS_RND)
	NODE_NAME_CASE(FDIV_RND)
	NODE_NAME_CASE(FDIVS)
	NODE_NAME_CASE(FDIVS_RND)
	NODE_NAME_CASE(FSQRT_RND)
	NODE_NAME_CASE(FSQRTS)
	NODE_NAME_CASE(FSQRTS_RND)
	NODE_NAME_CASE(FGETEXP)
	NODE_NAME_CASE(FGETEXP_SAE)
	NODE_NAME_CASE(FGETEXPS)
	NODE_NAME_CASE(FGETEXPS_SAE)
	NODE_NAME_CASE(SCALEF)
	NODE_NAME_CASE(SCALEF_RND)
	NODE_NAME_CASE(SCALEFS)
	NODE_NAME_CASE(SCALEFS_RND)
	NODE_NAME_CASE(MULHRS)
	NODE_NAME_CASE(SINT_TO_FP_RND)
	NODE_NAME_CASE(UINT_TO_FP_RND)
	NODE_NAME_CASE(CVTTP2SI)
	NODE_NAME_CASE(CVTTP2UI)
	NODE_NAME_CASE(STRICT_CVTTP2SI)
	NODE_NAME_CASE(STRICT_CVTTP2UI)
	NODE_NAME_CASE(MCVTTP2SI)
	NODE_NAME_CASE(MCVTTP2UI)
	NODE_NAME_CASE(CVTTP2SI_SAE)
	NODE_NAME_CASE(CVTTP2UI_SAE)
	NODE_NAME_CASE(CVTTS2SI)
	NODE_NAME_CASE(CVTTS2UI)
	NODE_NAME_CASE(CVTTS2SI_SAE)
	NODE_NAME_CASE(CVTTS2UI_SAE)
	NODE_NAME_CASE(CVTSI2P)
	NODE_NAME_CASE(CVTUI2P)
	NODE_NAME_CASE(STRICT_CVTSI2P)
	NODE_NAME_CASE(STRICT_CVTUI2P)
	NODE_NAME_CASE(MCVTSI2P)
	NODE_NAME_CASE(MCVTUI2P)
	NODE_NAME_CASE(VFPCLASS)
	NODE_NAME_CASE(VFPCLASSS)
	NODE_NAME_CASE(MULTISHIFT)
	NODE_NAME_CASE(SCALAR_SINT_TO_FP)
	NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
	NODE_NAME_CASE(SCALAR_UINT_TO_FP)
	NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
	NODE_NAME_CASE(CVTPS2PH)
	NODE_NAME_CASE(STRICT_CVTPS2PH)
	NODE_NAME_CASE(CVTPS2PH_SAE)
	NODE_NAME_CASE(MCVTPS2PH)
	NODE_NAME_CASE(MCVTPS2PH_SAE)
	NODE_NAME_CASE(CVTPH2PS)
	NODE_NAME_CASE(STRICT_CVTPH2PS)
	NODE_NAME_CASE(CVTPH2PS_SAE)
	NODE_NAME_CASE(CVTP2SI)
	NODE_NAME_CASE(CVTP2UI)
	NODE_NAME_CASE(MCVTP2SI)
	NODE_NAME_CASE(MCVTP2UI)
	NODE_NAME_CASE(CVTP2SI_RND)
	NODE_NAME_CASE(CVTP2UI_RND)
	NODE_NAME_CASE(CVTS2SI)
	NODE_NAME_CASE(CVTS2UI)
	NODE_NAME_CASE(CVTS2SI_RND)
	NODE_NAME_CASE(CVTS2UI_RND)
	NODE_NAME_CASE(CVTNE2PS2BF16)
	NODE_NAME_CASE(CVTNEPS2BF16)
	NODE_NAME_CASE(MCVTNEPS2BF16)
	NODE_NAME_CASE(DPBF16PS)
	NODE_NAME_CASE(LWPINS)
	NODE_NAME_CASE(MGATHER)
	NODE_NAME_CASE(MSCATTER)
	NODE_NAME_CASE(VPDPBUSD)
	NODE_NAME_CASE(VPDPBUSDS)
	NODE_NAME_CASE(VPDPWSSD)
	NODE_NAME_CASE(VPDPWSSDS)
	NODE_NAME_CASE(VPSHUFBITQMB)
	NODE_NAME_CASE(GF2P8MULB)
	NODE_NAME_CASE(GF2P8AFFINEQB)
	NODE_NAME_CASE(GF2P8AFFINEINVQB)
	NODE_NAME_CASE(NT_CALL)
	NODE_NAME_CASE(NT_BRIND)
	NODE_NAME_CASE(UMWAIT)
	NODE_NAME_CASE(TPAUSE)
	NODE_NAME_CASE(ENQCMD)
	NODE_NAME_CASE(ENQCMDS)
	NODE_NAME_CASE(VP2INTERSECT)
	NODE_NAME_CASE(VPDPBSUD)
	NODE_NAME_CASE(VPDPBSUDS)
	NODE_NAME_CASE(VPDPBUUD)
	NODE_NAME_CASE(VPDPBUUDS)
	NODE_NAME_CASE(VPDPBSSD)
	NODE_NAME_CASE(VPDPBSSDS)
	NODE_NAME_CASE(AESENC128KL)
	NODE_NAME_CASE(AESDEC128KL)
	NODE_NAME_CASE(AESENC256KL)
	NODE_NAME_CASE(AESDEC256KL)
	NODE_NAME_CASE(AESENCWIDE128KL)
	NODE_NAME_CASE(AESDECWIDE128KL)
	NODE_NAME_CASE(AESENCWIDE256KL)
	NODE_NAME_CASE(AESDECWIDE256KL)
	NODE_NAME_CASE(CMPCCXADD)
	NODE_NAME_CASE(TESTUI)
	NODE_NAME_CASE(FP80_ADD)
	NODE_NAME_CASE(STRICT_FP80_ADD)
	}
	return nullptr;
	#undef NODE_NAME_CASE
	}

	/// Return true if the addressing mode represented by AM is legal for this
	/// target, for a load/store of the specified type.
	bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS,
	Instruction *I) const {
	// X86 supports extremely general addressing modes.
	CodeModel::Model M = getTargetMachine().getCodeModel();

	// X86 allows a sign-extended 32-bit immediate field as a displacement.
	if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr))
	return false;

	if (AM.BaseGV) {
	unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV);

	// If a reference to this global requires an extra load, we can't fold it.
	if (isGlobalStubReference(GVFlags))
	return false;

	// If BaseGV requires a register for the PIC base, we cannot also have a
	// BaseReg specified.
	if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
	return false;

	// If lower 4G is not available, then we must use rip-relative addressing.
	if ((M != CodeModel::Small \|\| isPositionIndependent()) &&
	Subtarget.is64Bit() && (AM.BaseOffs \|\| AM.Scale > 1))
	return false;
	}

	switch (AM.Scale) {
	case 0:
	case 1:
	case 2:
	case 4:
	case 8:
	// These scales always work.
	break;
	case 3:
	case 5:
	case 9:
	// These scales are formed with basereg+scalereg. Only accept if there is
	// no basereg yet.
	if (AM.HasBaseReg)
	return false;
	break;
	default: // Other stuff never works.
	return false;
	}

	return true;
	}

	bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
	unsigned Bits = Ty->getScalarSizeInBits();

	// XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts.
	// Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred.
	if (Subtarget.hasXOP() &&
	(Bits == 8 \|\| Bits == 16 \|\| Bits == 32 \|\| Bits == 64))
	return false;

	// AVX2 has vpsllv[dq] instructions (and other shifts) that make variable
	// shifts just as cheap as scalar ones.
	if (Subtarget.hasAVX2() && (Bits == 32 \|\| Bits == 64))
	return false;

	// AVX512BW has shifts such as vpsllvw.
	if (Subtarget.hasBWI() && Bits == 16)
	return false;

	// Otherwise, it's significantly cheaper to shift by a scalar amount than by a
	// fully general vector.
	return true;
	}

	bool X86TargetLowering::isBinOp(unsigned Opcode) const {
	switch (Opcode) {
	// These are non-commutative binops.
	// TODO: Add more X86ISD opcodes once we have test coverage.
	case X86ISD::ANDNP:
	case X86ISD::PCMPGT:
	case X86ISD::FMAX:
	case X86ISD::FMIN:
	case X86ISD::FANDN:
	case X86ISD::VPSHA:
	case X86ISD::VPSHL:
	case X86ISD::VSHLV:
	case X86ISD::VSRLV:
	case X86ISD::VSRAV:
	return true;
	}

	return TargetLoweringBase::isBinOp(Opcode);
	}

	bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
	switch (Opcode) {
	// TODO: Add more X86ISD opcodes once we have test coverage.
	case X86ISD::PCMPEQ:
	case X86ISD::PMULDQ:
	case X86ISD::PMULUDQ:
	case X86ISD::FMAXC:
	case X86ISD::FMINC:
	case X86ISD::FAND:
	case X86ISD::FOR:
	case X86ISD::FXOR:
	return true;
	}

	return TargetLoweringBase::isCommutativeBinOp(Opcode);
	}

	bool X86TargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
	return NumBits1 > NumBits2;
	}

	bool X86TargetLowering::allowTruncateForTailCall(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;

	if (!isTypeLegal(EVT::getEVT(Ty1)))
	return false;

	assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");

	// Assuming the caller doesn't have a zeroext or signext return parameter,
	// truncation all the way down to i1 is valid.
	return true;
	}

	bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
	return isInt<32>(Imm);
	}

	bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
	// Can also use sub to handle negated immediates.
	return isInt<32>(Imm);
	}

	bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
	return isInt<32>(Imm);
	}

	bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
	if (!VT1.isScalarInteger() \|\| !VT2.isScalarInteger())
	return false;
	unsigned NumBits1 = VT1.getSizeInBits();
	unsigned NumBits2 = VT2.getSizeInBits();
	return NumBits1 > NumBits2;
	}

	bool X86TargetLowering::isZExtFree(Type Ty1, Type Ty2) const {
	// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
	return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit();
	}

	bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
	// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
	return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
	}

	bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
	EVT VT1 = Val.getValueType();
	if (isZExtFree(VT1, VT2))
	return true;

	if (Val.getOpcode() != ISD::LOAD)
	return false;

	if (!VT1.isSimple() \|\| !VT1.isInteger() \|\|
	!VT2.isSimple() \|\| !VT2.isInteger())
	return false;

	switch (VT1.getSimpleVT().SimpleTy) {
	default: break;
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	// X86 has 8, 16, and 32-bit zero-extending loads.
	return true;
	}

	return false;
	}

	bool X86TargetLowering::shouldSinkOperands(Instruction *I,
	SmallVectorImpl<Use *> &Ops) const {
	using namespace llvm::PatternMatch;

	FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
	if (!VTy)
	return false;

	if (I->getOpcode() == Instruction::Mul &&
	VTy->getElementType()->isIntegerTy(64)) {
	for (auto &Op : I->operands()) {
	// Make sure we are not already sinking this operand
	if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
	continue;

	// Look for PMULDQ pattern where the input is a sext_inreg from vXi32 or
	// the PMULUDQ pattern where the input is a zext_inreg from vXi32.
	if (Subtarget.hasSSE41() &&
	match(Op.get(), m_AShr(m_Shl(m_Value(), m_SpecificInt(32)),
	m_SpecificInt(32)))) {
	Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
	Ops.push_back(&Op);
	} else if (Subtarget.hasSSE2() &&
	match(Op.get(),
	m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
	Ops.push_back(&Op);
	}
	}

	return !Ops.empty();
	}

	// A uniform shift amount in a vector shift or funnel shift may be much
	// cheaper than a generic variable vector shift, so make that pattern visible
	// to SDAG by sinking the shuffle instruction next to the shift.
	int ShiftAmountOpNum = -1;
	if (I->isShift())
	ShiftAmountOpNum = 1;
	else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
	if (II->getIntrinsicID() == Intrinsic::fshl \|\|
	II->getIntrinsicID() == Intrinsic::fshr)
	ShiftAmountOpNum = 2;
	}

	if (ShiftAmountOpNum == -1)
	return false;

	auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
	if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
	isVectorShiftByScalarCheap(I->getType())) {
	Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
	return true;
	}

	return false;
	}

	bool X86TargetLowering::shouldConvertPhiType(Type From, Type To) const {
	if (!Subtarget.is64Bit())
	return false;
	return TargetLowering::shouldConvertPhiType(From, To);
	}

	bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
	if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0)))
	return false;

	EVT SrcVT = ExtVal.getOperand(0).getValueType();

	// There is no extending load for vXi1.
	if (SrcVT.getScalarType() == MVT::i1)
	return false;

	return true;
	}

	bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
	EVT VT) const {
	if (!Subtarget.hasAnyFMA())
	return false;

	VT = VT.getScalarType();

	if (!VT.isSimple())
	return false;

	switch (VT.getSimpleVT().SimpleTy) {
	case MVT::f16:
	return Subtarget.hasFP16();
	case MVT::f32:
	case MVT::f64:
	return true;
	default:
	break;
	}

	return false;
	}

	bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
	// i16 instructions are longer (0x66 prefix) and potentially slower.
	return !(VT1 == MVT::i32 && VT2 == MVT::i16);
	}

	bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
	EVT VT) const {
	// TODO: This is too general. There are cases where pre-AVX512 codegen would
	// benefit. The transform may also be profitable for scalar code.
	if (!Subtarget.hasAVX512())
	return false;
	if (!Subtarget.hasVLX() && !VT.is512BitVector())
	return false;
	if (!VT.isVector() \|\| VT.getScalarType() == MVT::i1)
	return false;

	return true;
	}

	/// Targets can use this to indicate that they only support some
	/// VECTOR_SHUFFLE operations, those with specific masks.
	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
	/// are assumed to be legal.
	bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
	if (!VT.isSimple())
	return false;

	// Not for i1 vectors
	if (VT.getSimpleVT().getScalarType() == MVT::i1)
	return false;

	// Very little shuffling can be done for 64-bit vectors right now.
	if (VT.getSimpleVT().getSizeInBits() == 64)
	return false;

	// We only care that the types being shuffled are legal. The lowering can
	// handle any possible shuffle mask that results.
	return isTypeLegal(VT.getSimpleVT());
	}

	bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
	EVT VT) const {
	// Don't convert an 'and' into a shuffle that we don't directly support.
	// vpblendw and vpshufb for 256-bit vectors are not available on AVX1.
	if (!Subtarget.hasAVX2())
	if (VT == MVT::v32i8 \|\| VT == MVT::v16i16)
	return false;

	// Just delegate to the generic legality, clear masks aren't special.
	return isShuffleMaskLegal(Mask, VT);
	}

	bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
	// If the subtarget is using thunks, we need to not generate jump tables.
	if (Subtarget.useIndirectThunkBranches())
	return false;

	// Otherwise, fallback on the generic logic.
	return TargetLowering::areJTsAllowed(Fn);
	}

	MVT X86TargetLowering::getPreferredSwitchConditionType(LLVMContext &Context,
	EVT ConditionVT) const {
	// Avoid 8 and 16 bit types because they increase the chance for unnecessary
	// zero-extensions.
	if (ConditionVT.getSizeInBits() < 32)
	return MVT::i32;
	return TargetLoweringBase::getPreferredSwitchConditionType(Context,
	ConditionVT);
	}

	//===----------------------------------------------------------------------===//
	// X86 Scheduler Hooks
	//===----------------------------------------------------------------------===//

	// Returns true if EFLAG is consumed after this iterator in the rest of the
	// basic block or any successors of the basic block.
	static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
	MachineBasicBlock *BB) {
	// Scan forward through BB for a use/def of EFLAGS.
	for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) {
	if (mi.readsRegister(X86::EFLAGS))
	return true;
	// If we found a def, we can stop searching.
	if (mi.definesRegister(X86::EFLAGS))
	return false;
	}

	// If we hit the end of the block, check whether EFLAGS is live into a
	// successor.
	for (MachineBasicBlock *Succ : BB->successors())
	if (Succ->isLiveIn(X86::EFLAGS))
	return true;

	return false;
	}

	/// Utility function to emit xbegin specifying the start of an RTM region.
	static MachineBasicBlock emitXBegin(MachineInstr &MI, MachineBasicBlock MBB,
	const TargetInstrInfo *TII) {
	const DebugLoc &DL = MI.getDebugLoc();

	const BasicBlock *BB = MBB->getBasicBlock();
	MachineFunction::iterator I = ++MBB->getIterator();

	// For the v = xbegin(), we generate
	//
	// thisMBB:
	// xbegin sinkMBB
	//
	// mainMBB:
	// s0 = -1
	//
	// fallBB:
	// eax = # XABORT_DEF
	// s1 = eax
	//
	// sinkMBB:
	// v = phi(s0/mainBB, s1/fallBB)

	MachineBasicBlock *thisMBB = MBB;
	MachineFunction *MF = MBB->getParent();
	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(I, mainMBB);
	MF->insert(I, fallMBB);
	MF->insert(I, sinkMBB);

	if (isEFLAGSLiveAfter(MI, MBB)) {
	mainMBB->addLiveIn(X86::EFLAGS);
	fallMBB->addLiveIn(X86::EFLAGS);
	sinkMBB->addLiveIn(X86::EFLAGS);
	}

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

	MachineRegisterInfo &MRI = MF->getRegInfo();
	Register DstReg = MI.getOperand(0).getReg();
	const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
	Register mainDstReg = MRI.createVirtualRegister(RC);
	Register fallDstReg = MRI.createVirtualRegister(RC);

	// thisMBB:
	// xbegin fallMBB
	// # fallthrough to mainMBB
	// # abortion to fallMBB
	BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
	thisMBB->addSuccessor(mainMBB);
	thisMBB->addSuccessor(fallMBB);

	// mainMBB:
	// mainDstReg := -1
	BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
	BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
	mainMBB->addSuccessor(sinkMBB);

	// fallMBB:
	// ; pseudo instruction to model hardware's definition from XABORT
	// EAX := XABORT_DEF
	// fallDstReg := EAX
	BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF));
	BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg)
	.addReg(X86::EAX);
	fallMBB->addSuccessor(sinkMBB);

	// sinkMBB:
	// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
	BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
	.addReg(mainDstReg).addMBB(mainMBB)
	.addReg(fallDstReg).addMBB(fallMBB);

	MI.eraseFromParent();
	return sinkMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	// Emit va_arg instruction on X86-64.

	// Operands to this pseudo-instruction:
	// 0 ) Output : destination address (reg)
	// 1-5) Input : va_list address (addr, i64mem)
	// 6 ) ArgSize : Size (in bytes) of vararg type
	// 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
	// 8 ) Align : Alignment of type
	// 9 ) EFLAGS (implicit-def)

	assert(MI.getNumOperands() == 10 && "VAARG should have 10 operands!");
	static_assert(X86::AddrNumOperands == 5, "VAARG assumes 5 address operands");

	Register DestReg = MI.getOperand(0).getReg();
	MachineOperand &Base = MI.getOperand(1);
	MachineOperand &Scale = MI.getOperand(2);
	MachineOperand &Index = MI.getOperand(3);
	MachineOperand &Disp = MI.getOperand(4);
	MachineOperand &Segment = MI.getOperand(5);
	unsigned ArgSize = MI.getOperand(6).getImm();
	unsigned ArgMode = MI.getOperand(7).getImm();
	Align Alignment = Align(MI.getOperand(8).getImm());

	MachineFunction *MF = MBB->getParent();

	// Memory Reference
	assert(MI.hasOneMemOperand() && "Expected VAARG to have one memoperand");

	MachineMemOperand *OldMMO = MI.memoperands().front();

	// Clone the MMO into two separate MMOs for loading and storing
	MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand(
	OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore);
	MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand(
	OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad);

	// Machine Information
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
	const TargetRegisterClass *AddrRegClass =
	getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout()));
	const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
	const DebugLoc &DL = MI.getDebugLoc();

	// struct va_list {
	// i32 gp_offset
	// i32 fp_offset
	// i64 overflow_area (address)
	// i64 reg_save_area (address)
	// }
	// sizeof(va_list) = 24
	// alignment(va_list) = 8

	unsigned TotalNumIntRegs = 6;
	unsigned TotalNumXMMRegs = 8;
	bool UseGPOffset = (ArgMode == 1);
	bool UseFPOffset = (ArgMode == 2);
	unsigned MaxOffset = TotalNumIntRegs * 8 +
	(UseFPOffset ? TotalNumXMMRegs * 16 : 0);

	/* Align ArgSize to a multiple of 8 */
	unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
	bool NeedsAlign = (Alignment > 8);

	MachineBasicBlock *thisMBB = MBB;
	MachineBasicBlock *overflowMBB;
	MachineBasicBlock *offsetMBB;
	MachineBasicBlock *endMBB;

	unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
	unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
	unsigned OffsetReg = 0;

	if (!UseGPOffset && !UseFPOffset) {
	// If we only pull from the overflow region, we don't create a branch.
	// We don't need to alter control flow.
	OffsetDestReg = 0; // unused
	OverflowDestReg = DestReg;

	offsetMBB = nullptr;
	overflowMBB = thisMBB;
	endMBB = thisMBB;
	} else {
	// First emit code to check if gp_offset (or fp_offset) is below the bound.
	// If so, pull the argument from reg_save_area. (branch to offsetMBB)
	// If not, pull from overflow_area. (branch to overflowMBB)
	//
	// thisMBB
	// \| .
	// \| .
	// offsetMBB overflowMBB
	// \| .
	// \| .
	// endMBB

	// Registers for the PHI in endMBB
	OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
	OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);

	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
	overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	endMBB = MF->CreateMachineBasicBlock(LLVM_BB);

	MachineFunction::iterator MBBIter = ++MBB->getIterator();

	// Insert the new basic blocks
	MF->insert(MBBIter, offsetMBB);
	MF->insert(MBBIter, overflowMBB);
	MF->insert(MBBIter, endMBB);

	// Transfer the remainder of MBB and its successor edges to endMBB.
	endMBB->splice(endMBB->begin(), thisMBB,
	std::next(MachineBasicBlock::iterator(MI)), thisMBB->end());
	endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);

	// Make offsetMBB and overflowMBB successors of thisMBB
	thisMBB->addSuccessor(offsetMBB);
	thisMBB->addSuccessor(overflowMBB);

	// endMBB is a successor of both offsetMBB and overflowMBB
	offsetMBB->addSuccessor(endMBB);
	overflowMBB->addSuccessor(endMBB);

	// Load the offset value into a register
	OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
	BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, UseFPOffset ? 4 : 0)
	.add(Segment)
	.setMemRefs(LoadOnlyMMO);

	// Check if there is enough room left to pull this argument.
	BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
	.addReg(OffsetReg)
	.addImm(MaxOffset + 8 - ArgSizeA8);

	// Branch to "overflowMBB" if offset >= max
	// Fall through to "offsetMBB" otherwise
	BuildMI(thisMBB, DL, TII->get(X86::JCC_1))
	.addMBB(overflowMBB).addImm(X86::COND_AE);
	}

	// In offsetMBB, emit code to use the reg_save_area.
	if (offsetMBB) {
	assert(OffsetReg != 0);

	// Read the reg_save_area address.
	Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(
	offsetMBB, DL,
	TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
	RegSaveReg)
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, Subtarget.isTarget64BitLP64() ? 16 : 12)
	.add(Segment)
	.setMemRefs(LoadOnlyMMO);

	if (Subtarget.isTarget64BitLP64()) {
	// Zero-extend the offset
	Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
	.addImm(0)
	.addReg(OffsetReg)
	.addImm(X86::sub_32bit);

	// Add the offset to the reg_save_area to get the final address.
	BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
	.addReg(OffsetReg64)
	.addReg(RegSaveReg);
	} else {
	// Add the offset to the reg_save_area to get the final address.
	BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg)
	.addReg(OffsetReg)
	.addReg(RegSaveReg);
	}

	// Compute the offset for the next argument
	Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
	BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
	.addReg(OffsetReg)
	.addImm(UseFPOffset ? 16 : 8);

	// Store it back into the va_list.
	BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, UseFPOffset ? 4 : 0)
	.add(Segment)
	.addReg(NextOffsetReg)
	.setMemRefs(StoreOnlyMMO);

	// Jump to endMBB
	BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
	.addMBB(endMBB);
	}

	//
	// Emit code to use overflow area
	//

	// Load the overflow_area address into a register.
	Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(overflowMBB, DL,
	TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
	OverflowAddrReg)
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, 8)
	.add(Segment)
	.setMemRefs(LoadOnlyMMO);

	// If we need to align it, do so. Otherwise, just copy the address
	// to OverflowDestReg.
	if (NeedsAlign) {
	// Align the overflow address
	Register TmpReg = MRI.createVirtualRegister(AddrRegClass);

	// aligned_addr = (addr + (align-1)) & ~(align-1)
	BuildMI(
	overflowMBB, DL,
	TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
	TmpReg)
	.addReg(OverflowAddrReg)
	.addImm(Alignment.value() - 1);

	BuildMI(
	overflowMBB, DL,
	TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri),
	OverflowDestReg)
	.addReg(TmpReg)
	.addImm(~(uint64_t)(Alignment.value() - 1));
	} else {
	BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
	.addReg(OverflowAddrReg);
	}

	// Compute the next overflow address after this argument.
	// (the overflow address should be kept 8-byte aligned)
	Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(
	overflowMBB, DL,
	TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
	NextAddrReg)
	.addReg(OverflowDestReg)
	.addImm(ArgSizeA8);

	// Store the new overflow address.
	BuildMI(overflowMBB, DL,
	TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr))
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, 8)
	.add(Segment)
	.addReg(NextAddrReg)
	.setMemRefs(StoreOnlyMMO);

	// If we branched, emit the PHI to the front of endMBB.
	if (offsetMBB) {
	BuildMI(*endMBB, endMBB->begin(), DL,
	TII->get(X86::PHI), DestReg)
	.addReg(OffsetDestReg).addMBB(offsetMBB)
	.addReg(OverflowDestReg).addMBB(overflowMBB);
	}

	// Erase the pseudo instruction
	MI.eraseFromParent();

	return endMBB;
	}

	// The EFLAGS operand of SelectItr might be missing a kill marker
	// because there were multiple uses of EFLAGS, and ISel didn't know
	// which to mark. Figure out whether SelectItr should have had a
	// kill marker, and set it if it should. Returns the correct kill
	// marker value.
	static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
	MachineBasicBlock* BB,
	const TargetRegisterInfo* TRI) {
	if (isEFLAGSLiveAfter(SelectItr, BB))
	return false;

	// We found a def, or hit the end of the basic block and EFLAGS wasn't live
	// out. SelectMI should have a kill flag on EFLAGS.
	SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
	return true;
	}

	// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
	// together with other CMOV pseudo-opcodes into a single basic-block with
	// conditional jump around it.
	static bool isCMOVPseudo(MachineInstr &MI) {
	switch (MI.getOpcode()) {
	case X86::CMOV_FR16:
	case X86::CMOV_FR16X:
	case X86::CMOV_FR32:
	case X86::CMOV_FR32X:
	case X86::CMOV_FR64:
	case X86::CMOV_FR64X:
	case X86::CMOV_GR8:
	case X86::CMOV_GR16:
	case X86::CMOV_GR32:
	case X86::CMOV_RFP32:
	case X86::CMOV_RFP64:
	case X86::CMOV_RFP80:
	case X86::CMOV_VR64:
	case X86::CMOV_VR128:
	case X86::CMOV_VR128X:
	case X86::CMOV_VR256:
	case X86::CMOV_VR256X:
	case X86::CMOV_VR512:
	case X86::CMOV_VK1:
	case X86::CMOV_VK2:
	case X86::CMOV_VK4:
	case X86::CMOV_VK8:
	case X86::CMOV_VK16:
	case X86::CMOV_VK32:
	case X86::CMOV_VK64:
	return true;

	default:
	return false;
	}
	}

	// Helper function, which inserts PHI functions into SinkMBB:
	// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
	// where %FalseValue(i) and %TrueValue(i) are taken from the consequent CMOVs
	// in [MIItBegin, MIItEnd) range. It returns the last MachineInstrBuilder for
	// the last PHI function inserted.
	static MachineInstrBuilder createPHIsForCMOVsInSinkBB(
	MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd,
	MachineBasicBlock TrueMBB, MachineBasicBlock FalseMBB,
	MachineBasicBlock *SinkMBB) {
	MachineFunction *MF = TrueMBB->getParent();
	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
	const DebugLoc &DL = MIItBegin->getDebugLoc();

	X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm());
	X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);

	MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();

	// As we are creating the PHIs, we have to be careful if there is more than
	// one. Later CMOVs may reference the results of earlier CMOVs, but later
	// PHIs have to reference the individual true/false inputs from earlier PHIs.
	// That also means that PHI construction must work forward from earlier to
	// later, and that the code must maintain a mapping from earlier PHI's
	// destination registers, and the registers that went into the PHI.
	DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
	MachineInstrBuilder MIB;

	for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
	Register DestReg = MIIt->getOperand(0).getReg();
	Register Op1Reg = MIIt->getOperand(1).getReg();
	Register Op2Reg = MIIt->getOperand(2).getReg();

	// If this CMOV we are generating is the opposite condition from
	// the jump we generated, then we have to swap the operands for the
	// PHI that is going to be generated.
	if (MIIt->getOperand(3).getImm() == OppCC)
	std::swap(Op1Reg, Op2Reg);

	if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
	Op1Reg = RegRewriteTable[Op1Reg].first;

	if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
	Op2Reg = RegRewriteTable[Op2Reg].second;

	MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
	.addReg(Op1Reg)
	.addMBB(FalseMBB)
	.addReg(Op2Reg)
	.addMBB(TrueMBB);

	// Add this PHI to the rewrite table.
	RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
	}

	return MIB;
	}

	// Lower cascaded selects in form of (SecondCmov (FirstCMOV F, T, cc1), T, cc2).
	MachineBasicBlock *
	X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
	MachineInstr &SecondCascadedCMOV,
	MachineBasicBlock *ThisMBB) const {
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const DebugLoc &DL = FirstCMOV.getDebugLoc();

	// We lower cascaded CMOVs such as
	//
	// (SecondCascadedCMOV (FirstCMOV F, T, cc1), T, cc2)
	//
	// to two successive branches.
	//
	// Without this, we would add a PHI between the two jumps, which ends up
	// creating a few copies all around. For instance, for
	//
	// (sitofp (zext (fcmp une)))
	//
	// we would generate:
	//
	// ucomiss %xmm1, %xmm0
	// movss <1.0f>, %xmm0
	// movaps %xmm0, %xmm1
	// jne .LBB5_2
	// xorps %xmm1, %xmm1
	// .LBB5_2:
	// jp .LBB5_4
	// movaps %xmm1, %xmm0
	// .LBB5_4:
	// retq
	//
	// because this custom-inserter would have generated:
	//
	// A
	// \| \
	// \| B
	// \| /
	// C
	// \| \
	// \| D
	// \| /
	// E
	//
	// A: X = ...; Y = ...
	// B: empty
	// C: Z = PHI [X, A], [Y, B]
	// D: empty
	// E: PHI [X, C], [Z, D]
	//
	// If we lower both CMOVs in a single step, we can instead generate:
	//
	// A
	// \| \
	// \| C
	// \| /\|
	// \|/ \|
	// \| \|
	// \| D
	// \| /
	// E
	//
	// A: X = ...; Y = ...
	// D: empty
	// E: PHI [X, A], [X, C], [Y, D]
	//
	// Which, in our sitofp/fcmp example, gives us something like:
	//
	// ucomiss %xmm1, %xmm0
	// movss <1.0f>, %xmm0
	// jne .LBB5_4
	// jp .LBB5_4
	// xorps %xmm0, %xmm0
	// .LBB5_4:
	// retq
	//

	// We lower cascaded CMOV into two successive branches to the same block.
	// EFLAGS is used by both, so mark it as live in the second.
	const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
	MachineFunction *F = ThisMBB->getParent();
	MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

	MachineFunction::iterator It = ++ThisMBB->getIterator();
	F->insert(It, FirstInsertedMBB);
	F->insert(It, SecondInsertedMBB);
	F->insert(It, SinkMBB);

	// For a cascaded CMOV, we lower it to two successive branches to
	// the same block (SinkMBB). EFLAGS is used by both, so mark it as live in
	// the FirstInsertedMBB.
	FirstInsertedMBB->addLiveIn(X86::EFLAGS);

	// If the EFLAGS register isn't dead in the terminator, then claim that it's
	// live into the sink and copy blocks.
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	if (!SecondCascadedCMOV.killsRegister(X86::EFLAGS) &&
	!checkAndUpdateEFLAGSKill(SecondCascadedCMOV, ThisMBB, TRI)) {
	SecondInsertedMBB->addLiveIn(X86::EFLAGS);
	SinkMBB->addLiveIn(X86::EFLAGS);
	}

	// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
	SinkMBB->splice(SinkMBB->begin(), ThisMBB,
	std::next(MachineBasicBlock::iterator(FirstCMOV)),
	ThisMBB->end());
	SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);

	// Fallthrough block for ThisMBB.
	ThisMBB->addSuccessor(FirstInsertedMBB);
	// The true block target of the first branch is always SinkMBB.
	ThisMBB->addSuccessor(SinkMBB);
	// Fallthrough block for FirstInsertedMBB.
	FirstInsertedMBB->addSuccessor(SecondInsertedMBB);
	// The true block for the branch of FirstInsertedMBB.
	FirstInsertedMBB->addSuccessor(SinkMBB);
	// This is fallthrough.
	SecondInsertedMBB->addSuccessor(SinkMBB);

	// Create the conditional branch instructions.
	X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm());
	BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);

	X86::CondCode SecondCC =
	X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm());
	BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC);

	// SinkMBB:
	// %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
	Register DestReg = SecondCascadedCMOV.getOperand(0).getReg();
	Register Op1Reg = FirstCMOV.getOperand(1).getReg();
	Register Op2Reg = FirstCMOV.getOperand(2).getReg();
	MachineInstrBuilder MIB =
	BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg)
	.addReg(Op1Reg)
	.addMBB(SecondInsertedMBB)
	.addReg(Op2Reg)
	.addMBB(ThisMBB);

	// The second SecondInsertedMBB provides the same incoming value as the
	// FirstInsertedMBB (the True operand of the SELECT_CC/CMOV nodes).
	MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB);

	// Now remove the CMOVs.
	FirstCMOV.eraseFromParent();
	SecondCascadedCMOV.eraseFromParent();

	return SinkMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
	MachineBasicBlock *ThisMBB) const {
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();

	// To "insert" a SELECT_CC instruction, we actually have to insert the
	// diamond control-flow pattern. The incoming instruction knows the
	// destination vreg to set, the condition code register to branch on, the
	// true/false values to select between and a branch opcode to use.

	// ThisMBB:
	// ...
	// TrueVal = ...
	// cmpTY ccX, r1, r2
	// bCC copy1MBB
	// fallthrough --> FalseMBB

	// This code lowers all pseudo-CMOV instructions. Generally it lowers these
	// as described above, by inserting a BB, and then making a PHI at the join
	// point to select the true and false operands of the CMOV in the PHI.
	//
	// The code also handles two different cases of multiple CMOV opcodes
	// in a row.
	//
	// Case 1:
	// In this case, there are multiple CMOVs in a row, all which are based on
	// the same condition setting (or the exact opposite condition setting).
	// In this case we can lower all the CMOVs using a single inserted BB, and
	// then make a number of PHIs at the join point to model the CMOVs. The only
	// trickiness here, is that in a case like:
	//
	// t2 = CMOV cond1 t1, f1
	// t3 = CMOV cond1 t2, f2
	//
	// when rewriting this into PHIs, we have to perform some renaming on the
	// temps since you cannot have a PHI operand refer to a PHI result earlier
	// in the same block. The "simple" but wrong lowering would be:
	//
	// t2 = PHI t1(BB1), f1(BB2)
	// t3 = PHI t2(BB1), f2(BB2)
	//
	// but clearly t2 is not defined in BB1, so that is incorrect. The proper
	// renaming is to note that on the path through BB1, t2 is really just a
	// copy of t1, and do that renaming, properly generating:
	//
	// t2 = PHI t1(BB1), f1(BB2)
	// t3 = PHI t1(BB1), f2(BB2)
	//
	// Case 2:
	// CMOV ((CMOV F, T, cc1), T, cc2) is checked here and handled by a separate
	// function - EmitLoweredCascadedSelect.

	X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm());
	X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
	MachineInstr *LastCMOV = &MI;
	MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI);

	// Check for case 1, where there are multiple CMOVs with the same condition
	// first. Of the two cases of multiple CMOV lowerings, case 1 reduces the
	// number of jumps the most.

	if (isCMOVPseudo(MI)) {
	// See if we have a string of CMOVS with the same condition. Skip over
	// intervening debug insts.
	while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) &&
	(NextMIIt->getOperand(3).getImm() == CC \|\|
	NextMIIt->getOperand(3).getImm() == OppCC)) {
	LastCMOV = &*NextMIIt;
	NextMIIt = next_nodbg(NextMIIt, ThisMBB->end());
	}
	}

	// This checks for case 2, but only do this if we didn't already find
	// case 1, as indicated by LastCMOV == MI.
	if (LastCMOV == &MI && NextMIIt != ThisMBB->end() &&
	NextMIIt->getOpcode() == MI.getOpcode() &&
	NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() &&
	NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() &&
	NextMIIt->getOperand(1).isKill()) {
	return EmitLoweredCascadedSelect(MI, *NextMIIt, ThisMBB);
	}

	const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
	MachineFunction *F = ThisMBB->getParent();
	MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

	MachineFunction::iterator It = ++ThisMBB->getIterator();
	F->insert(It, FalseMBB);
	F->insert(It, SinkMBB);

	// If the EFLAGS register isn't dead in the terminator, then claim that it's
	// live into the sink and copy blocks.
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	if (!LastCMOV->killsRegister(X86::EFLAGS) &&
	!checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) {
	FalseMBB->addLiveIn(X86::EFLAGS);
	SinkMBB->addLiveIn(X86::EFLAGS);
	}

	// Transfer any debug instructions inside the CMOV sequence to the sunk block.
	auto DbgRange = llvm::make_range(MachineBasicBlock::iterator(MI),
	MachineBasicBlock::iterator(LastCMOV));
	for (MachineInstr &MI : llvm::make_early_inc_range(DbgRange))
	if (MI.isDebugInstr())
	SinkMBB->push_back(MI.removeFromParent());

	// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
	SinkMBB->splice(SinkMBB->end(), ThisMBB,
	std::next(MachineBasicBlock::iterator(LastCMOV)),
	ThisMBB->end());
	SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);

	// Fallthrough block for ThisMBB.
	ThisMBB->addSuccessor(FalseMBB);
	// The true block target of the first (or only) branch is always a SinkMBB.
	ThisMBB->addSuccessor(SinkMBB);
	// Fallthrough block for FalseMBB.
	FalseMBB->addSuccessor(SinkMBB);

	// Create the conditional branch instruction.
	BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);

	// SinkMBB:
	// %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ]
	// ...
	MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
	MachineBasicBlock::iterator MIItEnd =
	std::next(MachineBasicBlock::iterator(LastCMOV));
	createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB);

	// Now remove the CMOV(s).
	ThisMBB->erase(MIItBegin, MIItEnd);

	return SinkMBB;
	}

	static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
	if (IsLP64) {
	if (isInt<8>(Imm))
	return X86::SUB64ri8;
	return X86::SUB64ri32;
	} else {
	if (isInt<8>(Imm))
	return X86::SUB32ri8;
	return X86::SUB32ri;
	}
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
	const DebugLoc &DL = MI.getDebugLoc();
	const BasicBlock *LLVM_BB = MBB->getBasicBlock();

	const unsigned ProbeSize = getStackProbeSize(*MF);

	MachineRegisterInfo &MRI = MF->getRegInfo();
	MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);

	MachineFunction::iterator MBBIter = ++MBB->getIterator();
	MF->insert(MBBIter, testMBB);
	MF->insert(MBBIter, blockMBB);
	MF->insert(MBBIter, tailMBB);

	Register sizeVReg = MI.getOperand(1).getReg();

	Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;

	Register TmpStackPtr = MRI.createVirtualRegister(
	TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
	Register FinalStackPtr = MRI.createVirtualRegister(
	TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);

	BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr)
	.addReg(physSPReg);
	{
	const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr;
	BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr)
	.addReg(TmpStackPtr)
	.addReg(sizeVReg);
	}

	// test rsp size

	BuildMI(testMBB, DL,
	TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
	.addReg(FinalStackPtr)
	.addReg(physSPReg);

	BuildMI(testMBB, DL, TII->get(X86::JCC_1))
	.addMBB(tailMBB)
	.addImm(X86::COND_GE);
	testMBB->addSuccessor(blockMBB);
	testMBB->addSuccessor(tailMBB);

	// Touch the block then extend it. This is done on the opposite side of
	// static probe where we allocate then touch, to avoid the need of probing the
	// tail of the static alloca. Possible scenarios are:
	//
	// + ---- <- ------------ <- ------------- <- ------------ +
	// \| \|
	// [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] -> [dyn probe] -> [tail alloc] +
	// \| \|
	// + <- ----------- <- ------------ <- ----------- <- ------------ +
	//
	// The property we want to enforce is to never have more than [page alloc] between two probes.

	const unsigned XORMIOpc =
	TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
	addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
	.addImm(0);

	BuildMI(blockMBB, DL,
	TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg)
	.addReg(physSPReg)
	.addImm(ProbeSize);


	BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
	blockMBB->addSuccessor(testMBB);

	// Replace original instruction by the expected stack ptr
	BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
	.addReg(FinalStackPtr);

	tailMBB->splice(tailMBB->end(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	tailMBB->transferSuccessorsAndUpdatePHIs(MBB);
	MBB->addSuccessor(testMBB);

	// Delete the original pseudo instruction.
	MI.eraseFromParent();

	// And we're done.
	return tailMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();
	const BasicBlock *LLVM_BB = BB->getBasicBlock();

	assert(MF->shouldSplitStack());

	const bool Is64Bit = Subtarget.is64Bit();
	const bool IsLP64 = Subtarget.isTarget64BitLP64();

	const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
	const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;

	// BB:
	// ... [Till the alloca]
	// If stacklet is not large enough, jump to mallocMBB
	//
	// bumpMBB:
	// Allocate by subtracting from RSP
	// Jump to continueMBB
	//
	// mallocMBB:
	// Allocate by call to runtime
	//
	// continueMBB:
	// ...
	// [rest of original BB]
	//

	MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);

	MachineRegisterInfo &MRI = MF->getRegInfo();
	const TargetRegisterClass *AddrRegClass =
	getRegClassFor(getPointerTy(MF->getDataLayout()));

	Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
	bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
	tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
	SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
	sizeVReg = MI.getOperand(1).getReg(),
	physSPReg =
	IsLP64 \|\| Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;

	MachineFunction::iterator MBBIter = ++BB->getIterator();

	MF->insert(MBBIter, bumpMBB);
	MF->insert(MBBIter, mallocMBB);
	MF->insert(MBBIter, continueMBB);

	continueMBB->splice(continueMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	continueMBB->transferSuccessorsAndUpdatePHIs(BB);

	// Add code to the main basic block to check if the stack limit has been hit,
	// and if so, jump to mallocMBB otherwise to bumpMBB.
	BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
	BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
	.addReg(tmpSPVReg).addReg(sizeVReg);
	BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
	.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
	.addReg(SPLimitVReg);
	BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);

	// bumpMBB simply decreases the stack pointer, since we know the current
	// stacklet has enough space.
	BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
	.addReg(SPLimitVReg);
	BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
	.addReg(SPLimitVReg);
	BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);

	// Calls into a routine in libgcc to allocate more space from the heap.
	const uint32_t *RegMask =
	Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
	if (IsLP64) {
	BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
	.addReg(sizeVReg);
	BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
	.addExternalSymbol("__morestack_allocate_stack_space")
	.addRegMask(RegMask)
	.addReg(X86::RDI, RegState::Implicit)
	.addReg(X86::RAX, RegState::ImplicitDefine);
	} else if (Is64Bit) {
	BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
	.addReg(sizeVReg);
	BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
	.addExternalSymbol("__morestack_allocate_stack_space")
	.addRegMask(RegMask)
	.addReg(X86::EDI, RegState::Implicit)
	.addReg(X86::EAX, RegState::ImplicitDefine);
	} else {
	BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
	.addImm(12);
	BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
	BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
	.addExternalSymbol("__morestack_allocate_stack_space")
	.addRegMask(RegMask)
	.addReg(X86::EAX, RegState::ImplicitDefine);
	}

	if (!Is64Bit)
	BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
	.addImm(16);

	BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
	.addReg(IsLP64 ? X86::RAX : X86::EAX);
	BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);

	// Set up the CFG correctly.
	BB->addSuccessor(bumpMBB);
	BB->addSuccessor(mallocMBB);
	mallocMBB->addSuccessor(continueMBB);
	bumpMBB->addSuccessor(continueMBB);

	// Take care of the PHI nodes.
	BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
	MI.getOperand(0).getReg())
	.addReg(mallocPtrVReg)
	.addMBB(mallocMBB)
	.addReg(bumpSPPtrVReg)
	.addMBB(bumpMBB);

	// Delete the original pseudo instruction.
	MI.eraseFromParent();

	// And we're done.
	return continueMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
	const DebugLoc &DL = MI.getDebugLoc();

	assert(!isAsynchronousEHPersonality(
	classifyEHPersonality(MF->getFunction().getPersonalityFn())) &&
	"SEH does not use catchret!");

	// Only 32-bit EH needs to worry about manually restoring stack pointers.
	if (!Subtarget.is32Bit())
	return BB;

	// C++ EH creates a new target block to hold the restore code, and wires up
	// the new block to the return destination with a normal JMP_4.
	MachineBasicBlock *RestoreMBB =
	MF->CreateMachineBasicBlock(BB->getBasicBlock());
	assert(BB->succ_size() == 1);
	MF->insert(std::next(BB->getIterator()), RestoreMBB);
	RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);
	BB->addSuccessor(RestoreMBB);
	MI.getOperand(0).setMBB(RestoreMBB);

	// Marking this as an EH pad but not a funclet entry block causes PEI to
	// restore stack pointers in the block.
	RestoreMBB->setIsEHPad(true);

	auto RestoreMBBI = RestoreMBB->begin();
	BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// So, here we replace TLSADDR with the sequence:
	// adjust_stackdown -> TLSADDR -> adjust_stackup.
	// We need this because TLSADDR is lowered into calls
	// inside MC, therefore without the two markers shrink-wrapping
	// may push the prologue/epilogue pass them.
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction &MF = *BB->getParent();

	// Emit CALLSEQ_START right before the instruction.
	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
	MachineInstrBuilder CallseqStart =
	BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
	BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);

	// Emit CALLSEQ_END right after the instruction.
	// We don't call erase from parent because we want to keep the
	// original instruction around.
	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
	MachineInstrBuilder CallseqEnd =
	BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
	BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);

	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// This is pretty easy. We're taking the value that we received from
	// our load from the relocation, sticking it in either RDI (x86-64)
	// or EAX and doing an indirect call. The return value will then
	// be in the normal return register.
	MachineFunction *F = BB->getParent();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();

	assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
	assert(MI.getOperand(3).isGlobal() && "This should be a global");

	// Get a register mask for the lowered call.
	// FIXME: The 32-bit calls have non-standard calling conventions. Use a
	// proper register mask.
	const uint32_t *RegMask =
	Subtarget.is64Bit() ?
	Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
	Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
	if (Subtarget.is64Bit()) {
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI)
	.addReg(X86::RIP)
	.addImm(0)
	.addReg(0)
	.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
	MI.getOperand(3).getTargetFlags())
	.addReg(0);
	MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
	addDirectMem(MIB, X86::RDI);
	MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
	} else if (!isPositionIndependent()) {
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
	.addReg(0)
	.addImm(0)
	.addReg(0)
	.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
	MI.getOperand(3).getTargetFlags())
	.addReg(0);
	MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
	addDirectMem(MIB, X86::EAX);
	MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
	} else {
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
	.addReg(TII->getGlobalBaseReg(F))
	.addImm(0)
	.addReg(0)
	.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
	MI.getOperand(3).getTargetFlags())
	.addReg(0);
	MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
	addDirectMem(MIB, X86::EAX);
	MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
	}

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
	switch (RPOpc) {
	case X86::INDIRECT_THUNK_CALL32:
	return X86::CALLpcrel32;
	case X86::INDIRECT_THUNK_CALL64:
	return X86::CALL64pcrel32;
	case X86::INDIRECT_THUNK_TCRETURN32:
	return X86::TCRETURNdi;
	case X86::INDIRECT_THUNK_TCRETURN64:
	return X86::TCRETURNdi64;
	}
	llvm_unreachable("not indirect thunk opcode");
	}

	static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
	unsigned Reg) {
	if (Subtarget.useRetpolineExternalThunk()) {
	// When using an external thunk for retpolines, we pick names that match the
	// names GCC happens to use as well. This helps simplify the implementation
	// of the thunks for kernels where they have no easy ability to create
	// aliases and are doing non-trivial configuration of the thunk's body. For
	// example, the Linux kernel will do boot-time hot patching of the thunk
	// bodies and cannot easily export aliases of these to loaded modules.
	//
	// Note that at any point in the future, we may need to change the semantics
	// of how we implement retpolines and at that time will likely change the
	// name of the called thunk. Essentially, there is no hard guarantee that
	// LLVM will generate calls to specific thunks, we merely make a best-effort
	// attempt to help out kernels and other systems where duplicating the
	// thunks is costly.
	switch (Reg) {
	case X86::EAX:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__x86_indirect_thunk_eax";
	case X86::ECX:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__x86_indirect_thunk_ecx";
	case X86::EDX:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__x86_indirect_thunk_edx";
	case X86::EDI:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__x86_indirect_thunk_edi";
	case X86::R11:
	assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
	return "__x86_indirect_thunk_r11";
	}
	llvm_unreachable("unexpected reg for external indirect thunk");
	}

	if (Subtarget.useRetpolineIndirectCalls() \|\|
	Subtarget.useRetpolineIndirectBranches()) {
	// When targeting an internal COMDAT thunk use an LLVM-specific name.
	switch (Reg) {
	case X86::EAX:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__llvm_retpoline_eax";
	case X86::ECX:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__llvm_retpoline_ecx";
	case X86::EDX:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__llvm_retpoline_edx";
	case X86::EDI:
	assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
	return "__llvm_retpoline_edi";
	case X86::R11:
	assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
	return "__llvm_retpoline_r11";
	}
	llvm_unreachable("unexpected reg for retpoline");
	}

	if (Subtarget.useLVIControlFlowIntegrity()) {
	assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
	return "__llvm_lvi_thunk_r11";
	}
	llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// Copy the virtual register into the R11 physical register and
	// call the retpoline thunk.
	const DebugLoc &DL = MI.getDebugLoc();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	Register CalleeVReg = MI.getOperand(0).getReg();
	unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());

	// Find an available scratch register to hold the callee. On 64-bit, we can
	// just use R11, but we scan for uses anyway to ensure we don't generate
	// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
	// already a register use operand to the call to hold the callee. If none
	// are available, use EDI instead. EDI is chosen because EBX is the PIC base
	// register and ESI is the base pointer to realigned stack frames with VLAs.
	SmallVector<unsigned, 3> AvailableRegs;
	if (Subtarget.is64Bit())
	AvailableRegs.push_back(X86::R11);
	else
	AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});

	// Zero out any registers that are already used.
	for (const auto &MO : MI.operands()) {
	if (MO.isReg() && MO.isUse())
	for (unsigned &Reg : AvailableRegs)
	if (Reg == MO.getReg())
	Reg = 0;
	}

	// Choose the first remaining non-zero available register.
	unsigned AvailableReg = 0;
	for (unsigned MaybeReg : AvailableRegs) {
	if (MaybeReg) {
	AvailableReg = MaybeReg;
	break;
	}
	}
	if (!AvailableReg)
	report_fatal_error("calling convention incompatible with retpoline, no "
	"available registers");

	const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);

	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
	.addReg(CalleeVReg);
	MI.getOperand(0).ChangeToES(Symbol);
	MI.setDesc(TII->get(Opc));
	MachineInstrBuilder(*BB->getParent(), &MI)
	.addReg(AvailableReg, RegState::Implicit \| RegState::Kill);
	return BB;
	}

	/// SetJmp implies future control flow change upon calling the corresponding
	/// LongJmp.
	/// Instead of using the 'return' instruction, the long jump fixes the stack and
	/// performs an indirect branch. To do so it uses the registers that were stored
	/// in the jump buffer (when calling SetJmp).
	/// In case the shadow stack is enabled we need to fix it as well, because some
	/// return addresses will be skipped.
	/// The function will save the SSP for future fixing in the function
	/// emitLongJmpShadowStackFix.
	/// \sa emitLongJmpShadowStackFix
	/// \param [in] MI The temporary Machine Instruction for the builtin.
	/// \param [in] MBB The Machine Basic Block that will be modified.
	void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	MachineInstrBuilder MIB;

	// Memory Reference.
	SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
	MI.memoperands_end());

	// Initialize a register with zero.
	MVT PVT = getPointerTy(MF->getDataLayout());
	const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
	Register ZReg = MRI.createVirtualRegister(PtrRC);
	unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
	BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
	.addDef(ZReg)
	.addReg(ZReg, RegState::Undef)
	.addReg(ZReg, RegState::Undef);

	// Read the current SSP Register value to the zeroed register.
	Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
	unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
	BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);

	// Write the SSP register value to offset 3 in input memory buffer.
	unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
	MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc));
	const int64_t SSPOffset = 3 * PVT.getStoreSize();
	const unsigned MemOpndSlot = 1;
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	if (i == X86::AddrDisp)
	MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset);
	else
	MIB.add(MI.getOperand(MemOpndSlot + i));
	}
	MIB.addReg(SSPCopyReg);
	MIB.setMemRefs(MMOs);
	}

	MachineBasicBlock *
	X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	const BasicBlock *BB = MBB->getBasicBlock();
	MachineFunction::iterator I = ++MBB->getIterator();

	// Memory Reference
	SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
	MI.memoperands_end());

	unsigned DstReg;
	unsigned MemOpndSlot = 0;

	unsigned CurOp = 0;

	DstReg = MI.getOperand(CurOp++).getReg();
	const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
	(void)TRI;
	Register mainDstReg = MRI.createVirtualRegister(RC);
	Register restoreDstReg = MRI.createVirtualRegister(RC);

	MemOpndSlot = CurOp;

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
	"Invalid Pointer Size!");

	// For v = setjmp(buf), we generate
	//
	// thisMBB:
	// buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB
	// SjLjSetup restoreMBB
	//
	// mainMBB:
	// v_main = 0
	//
	// sinkMBB:
	// v = phi(main, restore)
	//
	// restoreMBB:
	// if base pointer being used, load it from frame
	// v_restore = 1

	MachineBasicBlock *thisMBB = MBB;
	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(I, mainMBB);
	MF->insert(I, sinkMBB);
	MF->push_back(restoreMBB);
	restoreMBB->setMachineBlockAddressTaken();

	MachineInstrBuilder MIB;

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

	// thisMBB:
	unsigned PtrStoreOpc = 0;
	unsigned LabelReg = 0;
	const int64_t LabelOffset = 1 * PVT.getStoreSize();
	bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
	!isPositionIndependent();

	// Prepare IP either in reg or imm.
	if (!UseImmLabel) {
	PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
	const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
	LabelReg = MRI.createVirtualRegister(PtrRC);
	if (Subtarget.is64Bit()) {
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
	.addReg(X86::RIP)
	.addImm(0)
	.addReg(0)
	.addMBB(restoreMBB)
	.addReg(0);
	} else {
	const X86InstrInfo XII = static_cast<const X86InstrInfo>(TII);
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
	.addReg(XII->getGlobalBaseReg(MF))
	.addImm(0)
	.addReg(0)
	.addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
	.addReg(0);
	}
	} else
	PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
	// Store IP
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	if (i == X86::AddrDisp)
	MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
	else
	MIB.add(MI.getOperand(MemOpndSlot + i));
	}
	if (!UseImmLabel)
	MIB.addReg(LabelReg);
	else
	MIB.addMBB(restoreMBB);
	MIB.setMemRefs(MMOs);

	if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
	emitSetJmpShadowStackFix(MI, thisMBB);
	}

	// Setup
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
	.addMBB(restoreMBB);

	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	MIB.addRegMask(RegInfo->getNoPreservedMask());
	thisMBB->addSuccessor(mainMBB);
	thisMBB->addSuccessor(restoreMBB);

	// mainMBB:
	// EAX = 0
	BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
	mainMBB->addSuccessor(sinkMBB);

	// sinkMBB:
	BuildMI(*sinkMBB, sinkMBB->begin(), DL,
	TII->get(X86::PHI), DstReg)
	.addReg(mainDstReg).addMBB(mainMBB)
	.addReg(restoreDstReg).addMBB(restoreMBB);

	// restoreMBB:
	if (RegInfo->hasBasePointer(*MF)) {
	const bool Uses64BitFramePtr =
	Subtarget.isTarget64BitLP64() \|\| Subtarget.isTargetNaCl64();
	X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
	X86FI->setRestoreBasePointer(MF);
	Register FramePtr = RegInfo->getFrameRegister(*MF);
	Register BasePtr = RegInfo->getBaseRegister();
	unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
	addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
	FramePtr, true, X86FI->getRestoreBasePointerOffset())
	.setMIFlag(MachineInstr::FrameSetup);
	}
	BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
	BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
	restoreMBB->addSuccessor(sinkMBB);

	MI.eraseFromParent();
	return sinkMBB;
	}

	/// Fix the shadow stack using the previously saved SSP pointer.
	/// \sa emitSetJmpShadowStackFix
	/// \param [in] MI The temporary Machine Instruction for the builtin.
	/// \param [in] MBB The Machine Basic Block that will be modified.
	/// \return The sink MBB that will perform the future indirect branch.
	MachineBasicBlock *
	X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	// Memory Reference
	SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
	MI.memoperands_end());

	MVT PVT = getPointerTy(MF->getDataLayout());
	const TargetRegisterClass *PtrRC = getRegClassFor(PVT);

	// checkSspMBB:
	// xor vreg1, vreg1
	// rdssp vreg1
	// test vreg1, vreg1
	// je sinkMBB # Jump if Shadow Stack is not supported
	// fallMBB:
	// mov buf+24/12(%rip), vreg2
	// sub vreg1, vreg2
	// jbe sinkMBB # No need to fix the Shadow Stack
	// fixShadowMBB:
	// shr 3/2, vreg2
	// incssp vreg2 # fix the SSP according to the lower 8 bits
	// shr 8, vreg2
	// je sinkMBB
	// fixShadowLoopPrepareMBB:
	// shl vreg2
	// mov 128, vreg3
	// fixShadowLoopMBB:
	// incssp vreg3
	// dec vreg2
	// jne fixShadowLoopMBB # Iterate until you finish fixing
	// # the Shadow Stack
	// sinkMBB:

	MachineFunction::iterator I = ++MBB->getIterator();
	const BasicBlock *BB = MBB->getBasicBlock();

	MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(I, checkSspMBB);
	MF->insert(I, fallMBB);
	MF->insert(I, fixShadowMBB);
	MF->insert(I, fixShadowLoopPrepareMBB);
	MF->insert(I, fixShadowLoopMBB);
	MF->insert(I, sinkMBB);

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI),
	MBB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

	MBB->addSuccessor(checkSspMBB);

	// Initialize a register with zero.
	Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass);
	BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg);

	if (PVT == MVT::i64) {
	Register TmpZReg = MRI.createVirtualRegister(PtrRC);
	BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg)
	.addImm(0)
	.addReg(ZReg)
	.addImm(X86::sub_32bit);
	ZReg = TmpZReg;
	}

	// Read the current SSP Register value to the zeroed register.
	Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
	unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
	BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);

	// Check whether the result of the SSP register is zero and jump directly
	// to the sink.
	unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
	BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
	.addReg(SSPCopyReg)
	.addReg(SSPCopyReg);
	BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
	checkSspMBB->addSuccessor(sinkMBB);
	checkSspMBB->addSuccessor(fallMBB);

	// Reload the previously saved SSP register value.
	Register PrevSSPReg = MRI.createVirtualRegister(PtrRC);
	unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
	const int64_t SPPOffset = 3 * PVT.getStoreSize();
	MachineInstrBuilder MIB =
	BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	const MachineOperand &MO = MI.getOperand(i);
	if (i == X86::AddrDisp)
	MIB.addDisp(MO, SPPOffset);
	else if (MO.isReg()) // Don't add the whole operand, we don't want to
	// preserve kill flags.
	MIB.addReg(MO.getReg());
	else
	MIB.add(MO);
	}
	MIB.setMemRefs(MMOs);

	// Subtract the current SSP from the previous SSP.
	Register SspSubReg = MRI.createVirtualRegister(PtrRC);
	unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
	BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
	.addReg(PrevSSPReg)
	.addReg(SSPCopyReg);

	// Jump to sink in case PrevSSPReg <= SSPCopyReg.
	BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE);
	fallMBB->addSuccessor(sinkMBB);
	fallMBB->addSuccessor(fixShadowMBB);

	// Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8.
	unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
	unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
	Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
	BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
	.addReg(SspSubReg)
	.addImm(Offset);

	// Increase SSP when looking only on the lower 8 bits of the delta.
	unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
	BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);

	// Reset the lower 8 bits.
	Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
	BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
	.addReg(SspFirstShrReg)
	.addImm(8);

	// Jump if the result of the shift is zero.
	BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
	fixShadowMBB->addSuccessor(sinkMBB);
	fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);

	// Do a single shift left.
	unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1;
	Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
	BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
	.addReg(SspSecondShrReg);

	// Save the value 128 to a register (will be used next with incssp).
	Register Value128InReg = MRI.createVirtualRegister(PtrRC);
	unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
	BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
	.addImm(128);
	fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);

	// Since incssp only looks at the lower 8 bits, we might need to do several
	// iterations of incssp until we finish fixing the shadow stack.
	Register DecReg = MRI.createVirtualRegister(PtrRC);
	Register CounterReg = MRI.createVirtualRegister(PtrRC);
	BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
	.addReg(SspAfterShlReg)
	.addMBB(fixShadowLoopPrepareMBB)
	.addReg(DecReg)
	.addMBB(fixShadowLoopMBB);

	// Every iteration we increase the SSP by 128.
	BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg);

	// Every iteration we decrement the counter by 1.
	unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
	BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);

	// Jump if the counter is not zero yet.
	BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE);
	fixShadowLoopMBB->addSuccessor(sinkMBB);
	fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);

	return sinkMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	// Memory Reference
	SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
	MI.memoperands_end());

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
	"Invalid Pointer Size!");

	const TargetRegisterClass *RC =
	(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
	Register Tmp = MRI.createVirtualRegister(RC);
	// Since FP is only updated here but NOT referenced, it's treated as GPR.
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
	Register SP = RegInfo->getStackRegister();

	MachineInstrBuilder MIB;

	const int64_t LabelOffset = 1 * PVT.getStoreSize();
	const int64_t SPOffset = 2 * PVT.getStoreSize();

	unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
	unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;

	MachineBasicBlock *thisMBB = MBB;

	// When CET and shadow stack is enabled, we need to fix the Shadow Stack.
	if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
	thisMBB = emitLongJmpShadowStackFix(MI, thisMBB);
	}

	// Reload FP
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	const MachineOperand &MO = MI.getOperand(i);
	if (MO.isReg()) // Don't add the whole operand, we don't want to
	// preserve kill flags.
	MIB.addReg(MO.getReg());
	else
	MIB.add(MO);
	}
	MIB.setMemRefs(MMOs);

	// Reload IP
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	const MachineOperand &MO = MI.getOperand(i);
	if (i == X86::AddrDisp)
	MIB.addDisp(MO, LabelOffset);
	else if (MO.isReg()) // Don't add the whole operand, we don't want to
	// preserve kill flags.
	MIB.addReg(MO.getReg());
	else
	MIB.add(MO);
	}
	MIB.setMemRefs(MMOs);

	// Reload SP
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	if (i == X86::AddrDisp)
	MIB.addDisp(MI.getOperand(i), SPOffset);
	else
	MIB.add(MI.getOperand(i)); // We can preserve the kill flags here, it's
	// the last instruction of the expansion.
	}
	MIB.setMemRefs(MMOs);

	// Jump
	BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);

	MI.eraseFromParent();
	return thisMBB;
	}

	void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
	MachineBasicBlock *MBB,
	MachineBasicBlock *DispatchBB,
	int FI) const {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) && "Invalid Pointer Size!");

	unsigned Op = 0;
	unsigned VR = 0;

	bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
	!isPositionIndependent();

	if (UseImmLabel) {
	Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
	} else {
	const TargetRegisterClass *TRC =
	(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
	VR = MRI->createVirtualRegister(TRC);
	Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;

	if (Subtarget.is64Bit())
	BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
	.addReg(X86::RIP)
	.addImm(1)
	.addReg(0)
	.addMBB(DispatchBB)
	.addReg(0);
	else
	BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
	.addReg(0) /* TII->getGlobalBaseReg(MF) */
	.addImm(1)
	.addReg(0)
	.addMBB(DispatchBB, Subtarget.classifyBlockAddressReference())
	.addReg(0);
	}

	MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op));
	addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36);
	if (UseImmLabel)
	MIB.addMBB(DispatchBB);
	else
	MIB.addReg(VR);
	}

	MachineBasicBlock *
	X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineFunction *MF = BB->getParent();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	int FI = MF->getFrameInfo().getFunctionContextIndex();

	// Get a mapping of the call site numbers to all of the landing pads they're
	// associated with.
	DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
	unsigned MaxCSNum = 0;
	for (auto &MBB : *MF) {
	if (!MBB.isEHPad())
	continue;

	MCSymbol *Sym = nullptr;
	for (const auto &MI : MBB) {
	if (MI.isDebugInstr())
	continue;

	assert(MI.isEHLabel() && "expected EH_LABEL");
	Sym = MI.getOperand(0).getMCSymbol();
	break;
	}

	if (!MF->hasCallSiteLandingPad(Sym))
	continue;

	for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
	CallSiteNumToLPad[CSI].push_back(&MBB);
	MaxCSNum = std::max(MaxCSNum, CSI);
	}
	}

	// Get an ordered list of the machine basic blocks for the jump table.
	std::vector<MachineBasicBlock *> LPadList;
	SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
	LPadList.reserve(CallSiteNumToLPad.size());

	for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
	for (auto &LP : CallSiteNumToLPad[CSI]) {
	LPadList.push_back(LP);
	InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
	}
	}

	assert(!LPadList.empty() &&
	"No landing pad destinations for the dispatch jump table!");

	// Create the MBBs for the dispatch code.

	// Shove the dispatch's address into the return slot in the function context.
	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
	DispatchBB->setIsEHPad(true);

	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
	BuildMI(TrapBB, DL, TII->get(X86::TRAP));
	DispatchBB->addSuccessor(TrapBB);

	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
	DispatchBB->addSuccessor(DispContBB);

	// Insert MBBs.
	MF->push_back(DispatchBB);
	MF->push_back(DispContBB);
	MF->push_back(TrapBB);

	// Insert code into the entry block that creates and registers the function
	// context.
	SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI);

	// Create the jump table and associated information
	unsigned JTE = getJumpTableEncoding();
	MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
	unsigned MJTI = JTI->createJumpTableIndex(LPadList);

	const X86RegisterInfo &RI = TII->getRegisterInfo();
	// Add a register mask with no preserved registers. This results in all
	// registers being marked as clobbered.
	if (RI.hasBasePointer(*MF)) {
	const bool FPIs64Bit =
	Subtarget.isTarget64BitLP64() \|\| Subtarget.isTargetNaCl64();
	X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>();
	MFI->setRestoreBasePointer(MF);

	Register FP = RI.getFrameRegister(*MF);
	Register BP = RI.getBaseRegister();
	unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
	addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true,
	MFI->getRestoreBasePointerOffset())
	.addRegMask(RI.getNoPreservedMask());
	} else {
	BuildMI(DispatchBB, DL, TII->get(X86::NOOP))
	.addRegMask(RI.getNoPreservedMask());
	}

	// IReg is used as an index in a memory operand and therefore can't be SP
	Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
	addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI,
	Subtarget.is64Bit() ? 8 : 4);
	BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
	.addReg(IReg)
	.addImm(LPadList.size());
	BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);

	if (Subtarget.is64Bit()) {
	Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
	Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);

	// leaq .LJTI0_0(%rip), BReg
	BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg)
	.addReg(X86::RIP)
	.addImm(1)
	.addReg(0)
	.addJumpTableIndex(MJTI)
	.addReg(0);
	// movzx IReg64, IReg
	BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
	.addImm(0)
	.addReg(IReg)
	.addImm(X86::sub_32bit);

	switch (JTE) {
	case MachineJumpTableInfo::EK_BlockAddress:
	// jmpq *(BReg,IReg64,8)
	BuildMI(DispContBB, DL, TII->get(X86::JMP64m))
	.addReg(BReg)
	.addImm(8)
	.addReg(IReg64)
	.addImm(0)
	.addReg(0);
	break;
	case MachineJumpTableInfo::EK_LabelDifference32: {
	Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass);
	Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass);
	Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass);

	// movl (BReg,IReg64,4), OReg
	BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg)
	.addReg(BReg)
	.addImm(4)
	.addReg(IReg64)
	.addImm(0)
	.addReg(0);
	// movsx OReg64, OReg
	BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg);
	// addq BReg, OReg64, TReg
	BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg)
	.addReg(OReg64)
	.addReg(BReg);
	// jmpq *TReg
	BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg);
	break;
	}
	default:
	llvm_unreachable("Unexpected jump table encoding");
	}
	} else {
	// jmpl *.LJTI0_0(,IReg,4)
	BuildMI(DispContBB, DL, TII->get(X86::JMP32m))
	.addReg(0)
	.addImm(4)
	.addReg(IReg)
	.addJumpTableIndex(MJTI)
	.addReg(0);
	}

	// Add the jump table entries as successors to the MBB.
	SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
	for (auto &LP : LPadList)
	if (SeenMBBs.insert(LP).second)
	DispContBB->addSuccessor(LP);

	// N.B. the order the invoke BBs are processed in doesn't matter here.
	SmallVector<MachineBasicBlock *, 64> MBBLPads;
	const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
	for (MachineBasicBlock *MBB : InvokeBBs) {
	// Remove the landing pad successor from the invoke block and replace it
	// with the new dispatch block.
	// Keep a copy of Successors since it's modified inside the loop.
	SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
	MBB->succ_rend());
	// FIXME: Avoid quadratic complexity.
	for (auto *MBBS : Successors) {
	if (MBBS->isEHPad()) {
	MBB->removeSuccessor(MBBS);
	MBBLPads.push_back(MBBS);
	}
	}

	MBB->addSuccessor(DispatchBB);

	// Find the invoke call and mark all of the callee-saved registers as
	// 'implicit defined' so that they're spilled. This prevents code from
	// moving instructions to before the EH block, where they will never be
	// executed.
	for (auto &II : reverse(*MBB)) {
	if (!II.isCall())
	continue;

	DenseMap<unsigned, bool> DefRegs;
	for (auto &MOp : II.operands())
	if (MOp.isReg())
	DefRegs[MOp.getReg()] = true;

	MachineInstrBuilder MIB(*MF, &II);
	for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) {
	unsigned Reg = SavedRegs[RegIdx];
	if (!DefRegs[Reg])
	MIB.addReg(Reg, RegState::ImplicitDefine \| RegState::Dead);
	}

	break;
	}
	}

	// Mark all former landing pads as non-landing pads. The dispatch is the only
	// landing pad now.
	for (auto &LP : MBBLPads)
	LP->setIsEHPad(false);

	// The instruction is gone now.
	MI.eraseFromParent();
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();

	auto TMMImmToTMMReg = [](unsigned Imm) {
	assert (Imm < 8 && "Illegal tmm index");
	return X86::TMM0 + Imm;
	};
	switch (MI.getOpcode()) {
	default: llvm_unreachable("Unexpected instr type to insert");
	case X86::TLS_addr32:
	case X86::TLS_addr64:
	case X86::TLS_addrX32:
	case X86::TLS_base_addr32:
	case X86::TLS_base_addr64:
	case X86::TLS_base_addrX32:
	return EmitLoweredTLSAddr(MI, BB);
	case X86::INDIRECT_THUNK_CALL32:
	case X86::INDIRECT_THUNK_CALL64:
	case X86::INDIRECT_THUNK_TCRETURN32:
	case X86::INDIRECT_THUNK_TCRETURN64:
	return EmitLoweredIndirectThunk(MI, BB);
	case X86::CATCHRET:
	return EmitLoweredCatchRet(MI, BB);
	case X86::SEG_ALLOCA_32:
	case X86::SEG_ALLOCA_64:
	return EmitLoweredSegAlloca(MI, BB);
	case X86::PROBED_ALLOCA_32:
	case X86::PROBED_ALLOCA_64:
	return EmitLoweredProbedAlloca(MI, BB);
	case X86::TLSCall_32:
	case X86::TLSCall_64:
	return EmitLoweredTLSCall(MI, BB);
	case X86::CMOV_FR16:
	case X86::CMOV_FR16X:
	case X86::CMOV_FR32:
	case X86::CMOV_FR32X:
	case X86::CMOV_FR64:
	case X86::CMOV_FR64X:
	case X86::CMOV_GR8:
	case X86::CMOV_GR16:
	case X86::CMOV_GR32:
	case X86::CMOV_RFP32:
	case X86::CMOV_RFP64:
	case X86::CMOV_RFP80:
	case X86::CMOV_VR64:
	case X86::CMOV_VR128:
	case X86::CMOV_VR128X:
	case X86::CMOV_VR256:
	case X86::CMOV_VR256X:
	case X86::CMOV_VR512:
	case X86::CMOV_VK1:
	case X86::CMOV_VK2:
	case X86::CMOV_VK4:
	case X86::CMOV_VK8:
	case X86::CMOV_VK16:
	case X86::CMOV_VK32:
	case X86::CMOV_VK64:
	return EmitLoweredSelect(MI, BB);

	case X86::RDFLAGS32:
	case X86::RDFLAGS64: {
	unsigned PushF =
	MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
	unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
	MachineInstr Push = BuildMI(BB, MI, DL, TII->get(PushF));
	// Permit reads of the EFLAGS and DF registers without them being defined.
	// This intrinsic exists to read external processor state in flags, such as
	// the trap flag, interrupt flag, and direction flag, none of which are
	// modeled by the backend.
	assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
	"Unexpected register in operand!");
	Push->getOperand(2).setIsUndef();
	assert(Push->getOperand(3).getReg() == X86::DF &&
	"Unexpected register in operand!");
	Push->getOperand(3).setIsUndef();
	BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	case X86::WRFLAGS32:
	case X86::WRFLAGS64: {
	unsigned Push =
	MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
	unsigned PopF =
	MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
	BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
	BuildMI(*BB, MI, DL, TII->get(PopF));

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	case X86::FP80_ADDr:
	case X86::FP80_ADDm32: {
	// Change the floating point control register to use double extended
	// precision when performing the addition.
	int OrigCWFrameIdx =
	MF->getFrameInfo().CreateStackObject(2, Align(2), false);
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)),
	OrigCWFrameIdx);

	// Load the old value of the control word...
	Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
	OrigCWFrameIdx);

	// OR 0b11 into bit 8 and 9. 0b11 is the encoding for double extended
	// precision.
	Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
	BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
	.addReg(OldCW, RegState::Kill)
	.addImm(0x300);

	// Extract to 16 bits.
	Register NewCW16 =
	MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
	.addReg(NewCW, RegState::Kill, X86::sub_16bit);

	// Prepare memory for FLDCW.
	int NewCWFrameIdx =
	MF->getFrameInfo().CreateStackObject(2, Align(2), false);
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
	NewCWFrameIdx)
	.addReg(NewCW16, RegState::Kill);

	// Reload the modified control word now...
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
	NewCWFrameIdx);

	// Do the addition.
	if (MI.getOpcode() == X86::FP80_ADDr) {
	BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80))
	.add(MI.getOperand(0))
	.add(MI.getOperand(1))
	.add(MI.getOperand(2));
	} else {
	BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80m32))
	.add(MI.getOperand(0))
	.add(MI.getOperand(1))
	.add(MI.getOperand(2))
	.add(MI.getOperand(3))
	.add(MI.getOperand(4))
	.add(MI.getOperand(5))
	.add(MI.getOperand(6));
	}

	// Reload the original control word now.
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
	OrigCWFrameIdx);

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	case X86::FP32_TO_INT16_IN_MEM:
	case X86::FP32_TO_INT32_IN_MEM:
	case X86::FP32_TO_INT64_IN_MEM:
	case X86::FP64_TO_INT16_IN_MEM:
	case X86::FP64_TO_INT32_IN_MEM:
	case X86::FP64_TO_INT64_IN_MEM:
	case X86::FP80_TO_INT16_IN_MEM:
	case X86::FP80_TO_INT32_IN_MEM:
	case X86::FP80_TO_INT64_IN_MEM: {
	// Change the floating point control register to use "round towards zero"
	// mode when truncating to an integer value.
	int OrigCWFrameIdx =
	MF->getFrameInfo().CreateStackObject(2, Align(2), false);
	addFrameReference(BuildMI(*BB, MI, DL,
	TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);

	// Load the old value of the control word...
	Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
	OrigCWFrameIdx);

	// OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
	Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
	BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
	.addReg(OldCW, RegState::Kill).addImm(0xC00);

	// Extract to 16 bits.
	Register NewCW16 =
	MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
	.addReg(NewCW, RegState::Kill, X86::sub_16bit);

	// Prepare memory for FLDCW.
	int NewCWFrameIdx =
	MF->getFrameInfo().CreateStackObject(2, Align(2), false);
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
	NewCWFrameIdx)
	.addReg(NewCW16, RegState::Kill);

	// Reload the modified control word now...
	addFrameReference(BuildMI(*BB, MI, DL,
	TII->get(X86::FLDCW16m)), NewCWFrameIdx);

	// Get the X86 opcode to use.
	unsigned Opc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("illegal opcode!");
	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
	case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
	case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
	case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
	}

	X86AddressMode AM = getAddressFromInstr(&MI, 0);
	addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
	.addReg(MI.getOperand(X86::AddrNumOperands).getReg());

	// Reload the original control word now.
	addFrameReference(BuildMI(*BB, MI, DL,
	TII->get(X86::FLDCW16m)), OrigCWFrameIdx);

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	// xbegin
	case X86::XBEGIN:
	return emitXBegin(MI, BB, Subtarget.getInstrInfo());

	case X86::VAARG_64:
	case X86::VAARG_X32:
	return EmitVAARGWithCustomInserter(MI, BB);

	case X86::EH_SjLj_SetJmp32:
	case X86::EH_SjLj_SetJmp64:
	return emitEHSjLjSetJmp(MI, BB);

	case X86::EH_SjLj_LongJmp32:
	case X86::EH_SjLj_LongJmp64:
	return emitEHSjLjLongJmp(MI, BB);

	case X86::Int_eh_sjlj_setup_dispatch:
	return EmitSjLjDispatchBlock(MI, BB);

	case TargetOpcode::STATEPOINT:
	// As an implementation detail, STATEPOINT shares the STACKMAP format at
	// this point in the process. We diverge later.
	return emitPatchPoint(MI, BB);

	case TargetOpcode::STACKMAP:
	case TargetOpcode::PATCHPOINT:
	return emitPatchPoint(MI, BB);

	case TargetOpcode::PATCHABLE_EVENT_CALL:
	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
	return BB;

	case X86::LCMPXCHG8B: {
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	// In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
	// requires a memory operand. If it happens that current architecture is
	// i686 and for current function we need a base pointer
	// - which is ESI for i686 - register allocator would not be able to
	// allocate registers for an address in form of X(%reg, %reg, Y)
	// - there never would be enough unreserved registers during regalloc
	// (without the need for base ptr the only option would be X(%edi, %esi, Y).
	// We are giving a hand to register allocator by precomputing the address in
	// a new vreg using LEA.

	// If it is not i686 or there is no base pointer - nothing to do here.
	if (!Subtarget.is32Bit() \|\| !TRI->hasBasePointer(*MF))
	return BB;

	// Even though this code does not necessarily needs the base pointer to
	// be ESI, we check for that. The reason: if this assert fails, there are
	// some changes happened in the compiler base pointer handling, which most
	// probably have to be addressed somehow here.
	assert(TRI->getBaseRegister() == X86::ESI &&
	"LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
	"base pointer in mind");

	MachineRegisterInfo &MRI = MF->getRegInfo();
	MVT SPTy = getPointerTy(MF->getDataLayout());
	const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
	Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);

	X86AddressMode AM = getAddressFromInstr(&MI, 0);
	// Regalloc does not need any help when the memory operand of CMPXCHG8B
	// does not use index register.
	if (AM.IndexReg == X86::NoRegister)
	return BB;

	// After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
	// four operand definitions that are E[ABCD] registers. We skip them and
	// then insert the LEA.
	MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator());
	while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) \|\|
	RMBBI->definesRegister(X86::EBX) \|\|
	RMBBI->definesRegister(X86::ECX) \|\|
	RMBBI->definesRegister(X86::EDX))) {
	++RMBBI;
	}
	MachineBasicBlock::iterator MBBI(RMBBI);
	addFullAddress(
	BuildMI(BB, MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);

	setDirectAddressInInstr(&MI, 0, computedAddrVReg);

	return BB;
	}
	case X86::LCMPXCHG16B_NO_RBX: {
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	Register BasePtr = TRI->getBaseRegister();
	if (TRI->hasBasePointer(*MF) &&
	(BasePtr == X86::RBX \|\| BasePtr == X86::EBX)) {
	if (!BB->isLiveIn(BasePtr))
	BB->addLiveIn(BasePtr);
	// Save RBX into a virtual register.
	Register SaveRBX =
	MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
	.addReg(X86::RBX);
	Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
	for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
	MIB.add(MI.getOperand(Idx));
	MIB.add(MI.getOperand(X86::AddrNumOperands));
	MIB.addReg(SaveRBX);
	} else {
	// Simple case, just copy the virtual register to RBX.
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX)
	.add(MI.getOperand(X86::AddrNumOperands));
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B));
	for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
	MIB.add(MI.getOperand(Idx));
	}
	MI.eraseFromParent();
	return BB;
	}
	case X86::MWAITX: {
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	Register BasePtr = TRI->getBaseRegister();
	bool IsRBX = (BasePtr == X86::RBX \|\| BasePtr == X86::EBX);
	// If no need to save the base pointer, we generate MWAITXrrr,
	// else we generate pseudo MWAITX_SAVE_RBX.
	if (!IsRBX \|\| !TRI->hasBasePointer(*MF)) {
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
	.addReg(MI.getOperand(0).getReg());
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
	.addReg(MI.getOperand(1).getReg());
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX)
	.addReg(MI.getOperand(2).getReg());
	BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr));
	MI.eraseFromParent();
	} else {
	if (!BB->isLiveIn(BasePtr)) {
	BB->addLiveIn(BasePtr);
	}
	// Parameters can be copied into ECX and EAX but not EBX yet.
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
	.addReg(MI.getOperand(0).getReg());
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
	.addReg(MI.getOperand(1).getReg());
	assert(Subtarget.is64Bit() && "Expected 64-bit mode!");
	// Save RBX into a virtual register.
	Register SaveRBX =
	MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
	.addReg(X86::RBX);
	// Generate mwaitx pseudo.
	Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
	BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX))
	.addDef(Dst) // Destination tied in with SaveRBX.
	.addReg(MI.getOperand(2).getReg()) // input value of EBX.
	.addUse(SaveRBX); // Save of base pointer.
	MI.eraseFromParent();
	}
	return BB;
	}
	case TargetOpcode::PREALLOCATED_SETUP: {
	assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
	auto MFI = MF->getInfo<X86MachineFunctionInfo>();
	MFI->setHasPreallocatedCall(true);
	int64_t PreallocatedId = MI.getOperand(0).getImm();
	size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
	assert(StackAdjustment != 0 && "0 stack adjustment");
	LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
	<< StackAdjustment << "\n");
	BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
	.addReg(X86::ESP)
	.addImm(StackAdjustment);
	MI.eraseFromParent();
	return BB;
	}
	case TargetOpcode::PREALLOCATED_ARG: {
	assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
	int64_t PreallocatedId = MI.getOperand(1).getImm();
	int64_t ArgIdx = MI.getOperand(2).getImm();
	auto MFI = MF->getInfo<X86MachineFunctionInfo>();
	size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
	LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
	<< ", arg offset " << ArgOffset << "\n");
	// stack pointer + offset
	addRegOffset(
	BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
	X86::ESP, false, ArgOffset);
	MI.eraseFromParent();
	return BB;
	}
	case X86::PTDPBSSD:
	case X86::PTDPBSUD:
	case X86::PTDPBUSD:
	case X86::PTDPBUUD:
	case X86::PTDPBF16PS:
	case X86::PTDPFP16PS: {
	unsigned Opc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("illegal opcode!");
	case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
	case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
	case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
	case X86::PTDPBUUD: Opc = X86::TDPBUUD; break;
	case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break;
	case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
	}

	MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
	MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
	MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef);
	MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
	MIB.addReg(TMMImmToTMMReg(MI.getOperand(2).getImm()), RegState::Undef);

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}
	case X86::PTILEZERO: {
	unsigned Imm = MI.getOperand(0).getImm();
	BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}
	case X86::PTILELOADD:
	case X86::PTILELOADDT1:
	case X86::PTILESTORED: {
	unsigned Opc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("illegal opcode!");
	case X86::PTILELOADD: Opc = X86::TILELOADD; break;
	case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
	case X86::PTILESTORED: Opc = X86::TILESTORED; break;
	}

	MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
	unsigned CurOp = 0;
	if (Opc != X86::TILESTORED)
	MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
	RegState::Define);

	MIB.add(MI.getOperand(CurOp++)); // base
	MIB.add(MI.getOperand(CurOp++)); // scale
	MIB.add(MI.getOperand(CurOp++)); // index -- stride
	MIB.add(MI.getOperand(CurOp++)); // displacement
	MIB.add(MI.getOperand(CurOp++)); // segment

	if (Opc == X86::TILESTORED)
	MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
	RegState::Undef);

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// X86 Optimization Hooks
	//===----------------------------------------------------------------------===//

	bool
	X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
	const APInt &DemandedBits,
	const APInt &DemandedElts,
	TargetLoweringOpt &TLO) const {
	EVT VT = Op.getValueType();
	unsigned Opcode = Op.getOpcode();
	unsigned EltSize = VT.getScalarSizeInBits();

	if (VT.isVector()) {
	// If the constant is only all signbits in the active bits, then we should
	// extend it to the entire constant to allow it act as a boolean constant
	// vector.
	auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) {
	if (!ISD::isBuildVectorOfConstantSDNodes(V.getNode()))
	return false;
	for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) {
	if (!DemandedElts[i] \|\| V.getOperand(i).isUndef())
	continue;
	const APInt &Val = V.getConstantOperandAPInt(i);
	if (Val.getBitWidth() > Val.getNumSignBits() &&
	Val.trunc(ActiveBits).getNumSignBits() == ActiveBits)
	return true;
	}
	return false;
	};
	// For vectors - if we have a constant, then try to sign extend.
	// TODO: Handle AND/ANDN cases.
	unsigned ActiveBits = DemandedBits.getActiveBits();
	if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) &&
	(Opcode == ISD::OR \|\| Opcode == ISD::XOR) &&
	NeedsSignExtension(Op.getOperand(1), ActiveBits)) {
	EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits);
	EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT,
	VT.getVectorNumElements());
	SDValue NewC =
	TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT,
	Op.getOperand(1), TLO.DAG.getValueType(ExtVT));
	SDValue NewOp =
	TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC);
	return TLO.CombineTo(Op, NewOp);
	}
	return false;
	}

	// Only optimize Ands to prevent shrinking a constant that could be
	// matched by movzx.
	if (Opcode != ISD::AND)
	return false;

	// Make sure the RHS really is a constant.
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!C)
	return false;

	const APInt &Mask = C->getAPIntValue();

	// Clear all non-demanded bits initially.
	APInt ShrunkMask = Mask & DemandedBits;

	// Find the width of the shrunk mask.
	unsigned Width = ShrunkMask.getActiveBits();

	// If the mask is all 0s there's nothing to do here.
	if (Width == 0)
	return false;

	// Find the next power of 2 width, rounding up to a byte.
	Width = PowerOf2Ceil(std::max(Width, 8U));
	// Truncate the width to size to handle illegal types.
	Width = std::min(Width, EltSize);

	// Calculate a possible zero extend mask for this constant.
	APInt ZeroExtendMask = APInt::getLowBitsSet(EltSize, Width);

	// If we aren't changing the mask, just return true to keep it and prevent
	// the caller from optimizing.
	if (ZeroExtendMask == Mask)
	return true;

	// Make sure the new mask can be represented by a combination of mask bits
	// and non-demanded bits.
	if (!ZeroExtendMask.isSubsetOf(Mask \| ~DemandedBits))
	return false;

	// Replace the constant with the zero extend mask.
	SDLoc DL(Op);
	SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT);
	SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
	return TLO.CombineTo(Op, NewOp);
	}

	void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	unsigned BitWidth = Known.getBitWidth();
	unsigned NumElts = DemandedElts.getBitWidth();
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();
	assert((Opc >= ISD::BUILTIN_OP_END \|\|
	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
	Opc == ISD::INTRINSIC_W_CHAIN \|\|
	Opc == ISD::INTRINSIC_VOID) &&
	"Should use MaskedValueIsZero if you don't know whether Op"
	" is a target node!");

	Known.resetAll();
	switch (Opc) {
	default: break;
	case X86ISD::SETCC:
	Known.Zero.setBitsFrom(1);
	break;
	case X86ISD::MOVMSK: {
	unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
	Known.Zero.setBitsFrom(NumLoBits);
	break;
	}
	case X86ISD::PEXTRB:
	case X86ISD::PEXTRW: {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
	Op.getConstantOperandVal(1));
	Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
	Known = Known.anyextOrTrunc(BitWidth);
	Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
	break;
	}
	case X86ISD::VSRAI:
	case X86ISD::VSHLI:
	case X86ISD::VSRLI: {
	unsigned ShAmt = Op.getConstantOperandVal(1);
	if (ShAmt >= VT.getScalarSizeInBits()) {
	// Out of range logical bit shifts are guaranteed to be zero.
	// Out of range arithmetic bit shifts splat the sign bit.
	if (Opc != X86ISD::VSRAI) {
	Known.setAllZero();
	break;
	}

	ShAmt = VT.getScalarSizeInBits() - 1;
	}

	Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
	if (Opc == X86ISD::VSHLI) {
	Known.Zero <<= ShAmt;
	Known.One <<= ShAmt;
	// Low bits are known zero.
	Known.Zero.setLowBits(ShAmt);
	} else if (Opc == X86ISD::VSRLI) {
	Known.Zero.lshrInPlace(ShAmt);
	Known.One.lshrInPlace(ShAmt);
	// High bits are known zero.
	Known.Zero.setHighBits(ShAmt);
	} else {
	Known.Zero.ashrInPlace(ShAmt);
	Known.One.ashrInPlace(ShAmt);
	}
	break;
	}
	case X86ISD::PACKUS: {
	// PACKUS is just a truncation if the upper half is zero.
	APInt DemandedLHS, DemandedRHS;
	getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);

	Known.One = APInt::getAllOnes(BitWidth * 2);
	Known.Zero = APInt::getAllOnes(BitWidth * 2);

	KnownBits Known2;
	if (!!DemandedLHS) {
	Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1);
	Known = KnownBits::commonBits(Known, Known2);
	}
	if (!!DemandedRHS) {
	Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1);
	Known = KnownBits::commonBits(Known, Known2);
	}

	if (Known.countMinLeadingZeros() < BitWidth)
	Known.resetAll();
	Known = Known.trunc(BitWidth);
	break;
	}
	case X86ISD::VBROADCAST: {
	SDValue Src = Op.getOperand(0);
	if (!Src.getSimpleValueType().isVector()) {
	Known = DAG.computeKnownBits(Src, Depth + 1);
	return;
	}
	break;
	}
	case X86ISD::AND: {
	if (Op.getResNo() == 0) {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
	Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
	Known &= Known2;
	}
	break;
	}
	case X86ISD::ANDNP: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
	Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

	// ANDNP = (~X & Y);
	Known.One &= Known2.Zero;
	Known.Zero \|= Known2.One;
	break;
	}
	case X86ISD::FOR: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
	Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

	Known \|= Known2;
	break;
	}
	case X86ISD::PSADBW: {
	assert(VT.getScalarType() == MVT::i64 &&
	Op.getOperand(0).getValueType().getScalarType() == MVT::i8 &&
	"Unexpected PSADBW types");

	// PSADBW - fills low 16 bits and zeros upper 48 bits of each i64 result.
	Known.Zero.setBitsFrom(16);
	break;
	}
	case X86ISD::PMULUDQ: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
	Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

	Known = Known.trunc(BitWidth / 2).zext(BitWidth);
	Known2 = Known2.trunc(BitWidth / 2).zext(BitWidth);
	Known = KnownBits::mul(Known, Known2);
	break;
	}
	case X86ISD::CMOV: {
	Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
	// If we don't know any bits, early out.
	if (Known.isUnknown())
	break;
	KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

	// Only known if known in both the LHS and RHS.
	Known = KnownBits::commonBits(Known, Known2);
	break;
	}
	case X86ISD::BEXTR:
	case X86ISD::BEXTRI: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
	unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
	unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);

	// If the length is 0, the result is 0.
	if (Length == 0) {
	Known.setAllZero();
	break;
	}

	if ((Shift + Length) <= BitWidth) {
	Known = DAG.computeKnownBits(Op0, Depth + 1);
	Known = Known.extractBits(Length, Shift);
	Known = Known.zextOrTrunc(BitWidth);
	}
	}
	break;
	}
	case X86ISD::PDEP: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
	Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
	// Zeros are retained from the mask operand. But not ones.
	Known.One.clearAllBits();
	// The result will have at least as many trailing zeros as the non-mask
	// operand since bits can only map to the same or higher bit position.
	Known.Zero.setLowBits(Known2.countMinTrailingZeros());
	break;
	}
	case X86ISD::PEXT: {
	Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
	// The result has as many leading zeros as the number of zeroes in the mask.
	unsigned Count = Known.Zero.countPopulation();
	Known.Zero = APInt::getHighBitsSet(BitWidth, Count);
	Known.One.clearAllBits();
	break;
	}
	case X86ISD::VTRUNC:
	case X86ISD::VTRUNCS:
	case X86ISD::VTRUNCUS:
	case X86ISD::CVTSI2P:
	case X86ISD::CVTUI2P:
	case X86ISD::CVTP2SI:
	case X86ISD::CVTP2UI:
	case X86ISD::MCVTP2SI:
	case X86ISD::MCVTP2UI:
	case X86ISD::CVTTP2SI:
	case X86ISD::CVTTP2UI:
	case X86ISD::MCVTTP2SI:
	case X86ISD::MCVTTP2UI:
	case X86ISD::MCVTSI2P:
	case X86ISD::MCVTUI2P:
	case X86ISD::VFPROUND:
	case X86ISD::VMFPROUND:
	case X86ISD::CVTPS2PH:
	case X86ISD::MCVTPS2PH: {
	// Truncations/Conversions - upper elements are known zero.
	EVT SrcVT = Op.getOperand(0).getValueType();
	if (SrcVT.isVector()) {
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	if (NumElts > NumSrcElts &&
	DemandedElts.countTrailingZeros() >= NumSrcElts)
	Known.setAllZero();
	}
	break;
	}
	case X86ISD::STRICT_CVTTP2SI:
	case X86ISD::STRICT_CVTTP2UI:
	case X86ISD::STRICT_CVTSI2P:
	case X86ISD::STRICT_CVTUI2P:
	case X86ISD::STRICT_VFPROUND:
	case X86ISD::STRICT_CVTPS2PH: {
	// Strict Conversions - upper elements are known zero.
	EVT SrcVT = Op.getOperand(1).getValueType();
	if (SrcVT.isVector()) {
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	if (NumElts > NumSrcElts &&
	DemandedElts.countTrailingZeros() >= NumSrcElts)
	Known.setAllZero();
	}
	break;
	}
	case X86ISD::MOVQ2DQ: {
	// Move from MMX to XMM. Upper half of XMM should be 0.
	if (DemandedElts.countTrailingZeros() >= (NumElts / 2))
	Known.setAllZero();
	break;
	}
	case X86ISD::VBROADCAST_LOAD: {
	APInt UndefElts;
	SmallVector<APInt, 16> EltBits;
	if (getTargetConstantBitsFromNode(Op, BitWidth, UndefElts, EltBits,
	/AllowWholeUndefs/ false,
	/AllowPartialUndefs/ false)) {
	Known.Zero.setAllBits();
	Known.One.setAllBits();
	for (unsigned I = 0; I != NumElts; ++I) {
	if (!DemandedElts[I])
	continue;
	if (UndefElts[I]) {
	Known.resetAll();
	break;
	}
	KnownBits Known2 = KnownBits::makeConstant(EltBits[I]);
	Known = KnownBits::commonBits(Known, Known2);
	}
	return;
	}
	break;
	}
	}

	// Handle target shuffles.
	// TODO - use resolveTargetShuffleInputs once we can limit recursive depth.
	if (isTargetShuffle(Opc)) {
	SmallVector<int, 64> Mask;
	SmallVector<SDValue, 2> Ops;
	if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) {
	unsigned NumOps = Ops.size();
	unsigned NumElts = VT.getVectorNumElements();
	if (Mask.size() == NumElts) {
	SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
	Known.Zero.setAllBits(); Known.One.setAllBits();
	for (unsigned i = 0; i != NumElts; ++i) {
	if (!DemandedElts[i])
	continue;
	int M = Mask[i];
	if (M == SM_SentinelUndef) {
	// For UNDEF elements, we don't know anything about the common state
	// of the shuffle result.
	Known.resetAll();
	break;
	}
	if (M == SM_SentinelZero) {
	Known.One.clearAllBits();
	continue;
	}
	assert(0 <= M && (unsigned)M < (NumOps * NumElts) &&
	"Shuffle index out of range");

	unsigned OpIdx = (unsigned)M / NumElts;
	unsigned EltIdx = (unsigned)M % NumElts;
	if (Ops[OpIdx].getValueType() != VT) {
	// TODO - handle target shuffle ops with different value types.
	Known.resetAll();
	break;
	}
	DemandedOps[OpIdx].setBit(EltIdx);
	}
	// Known bits are the values that are shared by every demanded element.
	for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) {
	if (!DemandedOps[i])
	continue;
	KnownBits Known2 =
	DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1);
	Known = KnownBits::commonBits(Known, Known2);
	}
	}
	}
	}
	}

	unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
	unsigned Depth) const {
	EVT VT = Op.getValueType();
	unsigned VTBits = VT.getScalarSizeInBits();
	unsigned Opcode = Op.getOpcode();
	switch (Opcode) {
	case X86ISD::SETCC_CARRY:
	// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
	return VTBits;

	case X86ISD::VTRUNC: {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	unsigned NumSrcBits = SrcVT.getScalarSizeInBits();
	assert(VTBits < NumSrcBits && "Illegal truncation input type");
	APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
	unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
	if (Tmp > (NumSrcBits - VTBits))
	return Tmp - (NumSrcBits - VTBits);
	return 1;
	}

	case X86ISD::PACKSS: {
	// PACKSS is just a truncation if the sign bits extend to the packed size.
	APInt DemandedLHS, DemandedRHS;
	getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS,
	DemandedRHS);

	unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits();
	unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
	if (!!DemandedLHS)
	Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
	if (!!DemandedRHS)
	Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
	unsigned Tmp = std::min(Tmp0, Tmp1);
	if (Tmp > (SrcBits - VTBits))
	return Tmp - (SrcBits - VTBits);
	return 1;
	}

	case X86ISD::VBROADCAST: {
	SDValue Src = Op.getOperand(0);
	if (!Src.getSimpleValueType().isVector())
	return DAG.ComputeNumSignBits(Src, Depth + 1);
	break;
	}

	case X86ISD::VSHLI: {
	SDValue Src = Op.getOperand(0);
	const APInt &ShiftVal = Op.getConstantOperandAPInt(1);
	if (ShiftVal.uge(VTBits))
	return VTBits; // Shifted all bits out --> zero.
	unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1);
	if (ShiftVal.uge(Tmp))
	return 1; // Shifted all sign bits out --> unknown.
	return Tmp - ShiftVal.getZExtValue();
	}

	case X86ISD::VSRAI: {
	SDValue Src = Op.getOperand(0);
	APInt ShiftVal = Op.getConstantOperandAPInt(1);
	if (ShiftVal.uge(VTBits - 1))
	return VTBits; // Sign splat.
	unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1);
	ShiftVal += Tmp;
	return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
	}

	case X86ISD::FSETCC:
	// cmpss/cmpsd return zero/all-bits result values in the bottom element.
	if (VT == MVT::f32 \|\| VT == MVT::f64 \|\|
	((VT == MVT::v4f32 \|\| VT == MVT::v2f64) && DemandedElts == 1))
	return VTBits;
	break;

	case X86ISD::PCMPGT:
	case X86ISD::PCMPEQ:
	case X86ISD::CMPP:
	case X86ISD::VPCOM:
	case X86ISD::VPCOMU:
	// Vector compares return zero/all-bits result values.
	return VTBits;

	case X86ISD::ANDNP: {
	unsigned Tmp0 =
	DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
	if (Tmp0 == 1) return 1; // Early out.
	unsigned Tmp1 =
	DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
	return std::min(Tmp0, Tmp1);
	}

	case X86ISD::CMOV: {
	unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
	if (Tmp0 == 1) return 1; // Early out.
	unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
	return std::min(Tmp0, Tmp1);
	}
	}

	// Handle target shuffles.
	// TODO - use resolveTargetShuffleInputs once we can limit recursive depth.
	if (isTargetShuffle(Opcode)) {
	SmallVector<int, 64> Mask;
	SmallVector<SDValue, 2> Ops;
	if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) {
	unsigned NumOps = Ops.size();
	unsigned NumElts = VT.getVectorNumElements();
	if (Mask.size() == NumElts) {
	SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
	for (unsigned i = 0; i != NumElts; ++i) {
	if (!DemandedElts[i])
	continue;
	int M = Mask[i];
	if (M == SM_SentinelUndef) {
	// For UNDEF elements, we don't know anything about the common state
	// of the shuffle result.
	return 1;
	} else if (M == SM_SentinelZero) {
	// Zero = all sign bits.
	continue;
	}
	assert(0 <= M && (unsigned)M < (NumOps * NumElts) &&
	"Shuffle index out of range");

	unsigned OpIdx = (unsigned)M / NumElts;
	unsigned EltIdx = (unsigned)M % NumElts;
	if (Ops[OpIdx].getValueType() != VT) {
	// TODO - handle target shuffle ops with different value types.
	return 1;
	}
	DemandedOps[OpIdx].setBit(EltIdx);
	}
	unsigned Tmp0 = VTBits;
	for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) {
	if (!DemandedOps[i])
	continue;
	unsigned Tmp1 =
	DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1);
	Tmp0 = std::min(Tmp0, Tmp1);
	}
	return Tmp0;
	}
	}
	}

	// Fallback case.
	return 1;
	}

	SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
	if (N->getOpcode() == X86ISD::Wrapper \|\| N->getOpcode() == X86ISD::WrapperRIP)
	return N->getOperand(0);
	return N;
	}

	// Helper to look for a normal load that can be narrowed into a vzload with the
	// specified VT and memory VT. Returns SDValue() on failure.
	static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
	SelectionDAG &DAG) {
	// Can't if the load is volatile or atomic.
	if (!LN->isSimple())
	return SDValue();

	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
	return DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, SDLoc(LN), Tys, Ops, MemVT,
	LN->getPointerInfo(), LN->getOriginalAlign(),
	LN->getMemOperand()->getFlags());
	}

	// Attempt to match a combined shuffle mask against supported unary shuffle
	// instructions.
	// TODO: Investigate sharing more of this with shuffle lowering.
	static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
	bool AllowFloatDomain, bool AllowIntDomain,
	SDValue V1, const SelectionDAG &DAG,
	const X86Subtarget &Subtarget, unsigned &Shuffle,
	MVT &SrcVT, MVT &DstVT) {
	unsigned NumMaskElts = Mask.size();
	unsigned MaskEltSize = MaskVT.getScalarSizeInBits();

	// Match against a VZEXT_MOVL vXi32 and vXi16 zero-extending instruction.
	if (Mask[0] == 0 &&
	(MaskEltSize == 32 \|\| (MaskEltSize == 16 && Subtarget.hasFP16()))) {
	if ((isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) \|\|
	(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
	Shuffle = X86ISD::VZEXT_MOVL;
	if (MaskEltSize == 16)
	SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
	else
	SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
	return true;
	}
	}

	// Match against a ANY/ZERO_EXTEND_VECTOR_INREG instruction.
	// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
	if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
	unsigned MaxScale = 64 / MaskEltSize;
	for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
	bool MatchAny = true;
	bool MatchZero = true;
	unsigned NumDstElts = NumMaskElts / Scale;
	for (unsigned i = 0; i != NumDstElts && (MatchAny \|\| MatchZero); ++i) {
	if (!isUndefOrEqual(Mask[i * Scale], (int)i)) {
	MatchAny = MatchZero = false;
	break;
	}
	MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1);
	MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
	}
	if (MatchAny \|\| MatchZero) {
	assert(MatchZero && "Failed to match zext but matched aext?");
	unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
	MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
	MVT::getIntegerVT(MaskEltSize);
	SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);

	Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
	if (SrcVT.getVectorNumElements() != NumDstElts)
	Shuffle = DAG.getOpcode_EXTEND_VECTOR_INREG(Shuffle);

	DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
	DstVT = MVT::getVectorVT(DstVT, NumDstElts);
	return true;
	}
	}
	}

	// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
	if (((MaskEltSize == 32) \|\| (MaskEltSize == 64 && Subtarget.hasSSE2()) \|\|
	(MaskEltSize == 16 && Subtarget.hasFP16())) &&
	isUndefOrEqual(Mask[0], 0) &&
	isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
	Shuffle = X86ISD::VZEXT_MOVL;
	if (MaskEltSize == 16)
	SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
	else
	SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
	return true;
	}

	// Check if we have SSE3 which will let us use MOVDDUP etc. The
	// instructions are no slower than UNPCKLPD but has the option to
	// fold the input operand into even an unaligned memory load.
	if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG, V1)) {
	Shuffle = X86ISD::MOVDDUP;
	SrcVT = DstVT = MVT::v2f64;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) {
	Shuffle = X86ISD::MOVSLDUP;
	SrcVT = DstVT = MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, DAG, V1)) {
	Shuffle = X86ISD::MOVSHDUP;
	SrcVT = DstVT = MVT::v4f32;
	return true;
	}
	}

	if (MaskVT.is256BitVector() && AllowFloatDomain) {
	assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) {
	Shuffle = X86ISD::MOVDDUP;
	SrcVT = DstVT = MVT::v4f64;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
	V1)) {
	Shuffle = X86ISD::MOVSLDUP;
	SrcVT = DstVT = MVT::v8f32;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG,
	V1)) {
	Shuffle = X86ISD::MOVSHDUP;
	SrcVT = DstVT = MVT::v8f32;
	return true;
	}
	}

	if (MaskVT.is512BitVector() && AllowFloatDomain) {
	assert(Subtarget.hasAVX512() &&
	"AVX512 required for 512-bit vector shuffles");
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
	V1)) {
	Shuffle = X86ISD::MOVDDUP;
	SrcVT = DstVT = MVT::v8f64;
	return true;
	}
	if (isTargetShuffleEquivalent(
	MaskVT, Mask,
	{0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) {
	Shuffle = X86ISD::MOVSLDUP;
	SrcVT = DstVT = MVT::v16f32;
	return true;
	}
	if (isTargetShuffleEquivalent(
	MaskVT, Mask,
	{1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) {
	Shuffle = X86ISD::MOVSHDUP;
	SrcVT = DstVT = MVT::v16f32;
	return true;
	}
	}

	return false;
	}

	// Attempt to match a combined shuffle mask against supported unary immediate
	// permute instructions.
	// TODO: Investigate sharing more of this with shuffle lowering.
	static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
	const APInt &Zeroable,
	bool AllowFloatDomain, bool AllowIntDomain,
	const SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &ShuffleVT,
	unsigned &PermuteImm) {
	unsigned NumMaskElts = Mask.size();
	unsigned InputSizeInBits = MaskVT.getSizeInBits();
	unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
	MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
	bool ContainsZeros = isAnyZero(Mask);

	// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
	if (!ContainsZeros && MaskScalarSizeInBits == 64) {
	// Check for lane crossing permutes.
	if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
	// PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
	if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) {
	Shuffle = X86ISD::VPERMI;
	ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
	PermuteImm = getV4X86ShuffleImm(Mask);
	return true;
	}
	if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) {
	SmallVector<int, 4> RepeatedMask;
	if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
	Shuffle = X86ISD::VPERMI;
	ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
	PermuteImm = getV4X86ShuffleImm(RepeatedMask);
	return true;
	}
	}
	} else if (AllowFloatDomain && Subtarget.hasAVX()) {
	// VPERMILPD can permute with a non-repeating shuffle.
	Shuffle = X86ISD::VPERMILPI;
	ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
	PermuteImm = 0;
	for (int i = 0, e = Mask.size(); i != e; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
	PermuteImm \|= (M & 1) << i;
	}
	return true;
	}
	}

	// Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
	// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
	// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
	if ((MaskScalarSizeInBits == 64 \|\| MaskScalarSizeInBits == 32) &&
	!ContainsZeros && (AllowIntDomain \|\| Subtarget.hasAVX())) {
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
	// Narrow the repeated mask to create 32-bit element permutes.
	SmallVector<int, 4> WordMask = RepeatedMask;
	if (MaskScalarSizeInBits == 64)
	narrowShuffleMaskElts(2, RepeatedMask, WordMask);

	Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
	ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
	ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
	PermuteImm = getV4X86ShuffleImm(WordMask);
	return true;
	}
	}

	// Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
	if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 &&
	((MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasBWI()))) {
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
	ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
	ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);

	// PSHUFLW: permute lower 4 elements only.
	if (isUndefOrInRange(LoMask, 0, 4) &&
	isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
	Shuffle = X86ISD::PSHUFLW;
	ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
	PermuteImm = getV4X86ShuffleImm(LoMask);
	return true;
	}

	// PSHUFHW: permute upper 4 elements only.
	if (isUndefOrInRange(HiMask, 4, 8) &&
	isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
	// Offset the HiMask so that we can create the shuffle immediate.
	int OffsetHiMask[4];
	for (int i = 0; i != 4; ++i)
	OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);

	Shuffle = X86ISD::PSHUFHW;
	ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
	PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
	return true;
	}
	}
	}

	// Attempt to match against byte/bit shifts.
	if (AllowIntDomain &&
	((MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
	int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits,
	Mask, 0, Zeroable, Subtarget);
	if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() \|\| Subtarget.hasBWI() \|\|
	32 <= ShuffleVT.getScalarSizeInBits())) {
	PermuteImm = (unsigned)ShiftAmt;
	return true;
	}
	}

	// Attempt to match against bit rotates.
	if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 &&
	((MaskVT.is128BitVector() && Subtarget.hasXOP()) \|\|
	Subtarget.hasAVX512())) {
	int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits,
	Subtarget, Mask);
	if (0 < RotateAmt) {
	Shuffle = X86ISD::VROTLI;
	PermuteImm = (unsigned)RotateAmt;
	return true;
	}
	}

	return false;
	}

	// Attempt to match a combined unary shuffle mask against supported binary
	// shuffle instructions.
	// TODO: Investigate sharing more of this with shuffle lowering.
	static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
	bool AllowFloatDomain, bool AllowIntDomain,
	SDValue &V1, SDValue &V2, const SDLoc &DL,
	SelectionDAG &DAG, const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
	bool IsUnary) {
	unsigned NumMaskElts = Mask.size();
	unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
	unsigned SizeInBits = MaskVT.getSizeInBits();

	if (MaskVT.is128BitVector()) {
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) &&
	AllowFloatDomain) {
	V2 = V1;
	V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1);
	Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS;
	SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}, DAG) &&
	AllowFloatDomain) {
	V2 = V1;
	Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS;
	SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}, DAG) &&
	Subtarget.hasSSE2() && (AllowFloatDomain \|\| !Subtarget.hasSSE41())) {
	std::swap(V1, V2);
	Shuffle = X86ISD::MOVSD;
	SrcVT = DstVT = MVT::v2f64;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG) &&
	(AllowFloatDomain \|\| !Subtarget.hasSSE41())) {
	Shuffle = X86ISD::MOVSS;
	SrcVT = DstVT = MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7},
	DAG) &&
	Subtarget.hasFP16()) {
	Shuffle = X86ISD::MOVSH;
	SrcVT = DstVT = MVT::v8f16;
	return true;
	}
	}

	// Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
	if (((MaskVT == MVT::v8i16 \|\| MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) \|\|
	((MaskVT == MVT::v16i16 \|\| MaskVT == MVT::v32i8) && Subtarget.hasInt256()) \|\|
	((MaskVT == MVT::v32i16 \|\| MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
	if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
	Subtarget)) {
	DstVT = MaskVT;
	return true;
	}
	}

	// Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
	if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) \|\|
	(MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
	if (matchShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG,
	Subtarget)) {
	SrcVT = DstVT = MaskVT;
	if (MaskVT.is256BitVector() && !Subtarget.hasAVX2())
	SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
	return true;
	}
	}

	// Attempt to match against a OR if we're performing a blend shuffle and the
	// non-blended source element is zero in each case.
	// TODO: Handle cases where V1/V2 sizes doesn't match SizeInBits.
	if (SizeInBits == V1.getValueSizeInBits() &&
	SizeInBits == V2.getValueSizeInBits() &&
	(EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
	(EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
	bool IsBlend = true;
	unsigned NumV1Elts = V1.getValueType().getVectorNumElements();
	unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
	unsigned Scale1 = NumV1Elts / NumMaskElts;
	unsigned Scale2 = NumV2Elts / NumMaskElts;
	APInt DemandedZeroV1 = APInt::getZero(NumV1Elts);
	APInt DemandedZeroV2 = APInt::getZero(NumV2Elts);
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	if (M == SM_SentinelZero) {
	DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
	DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
	continue;
	}
	if (M == (int)i) {
	DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
	continue;
	}
	if (M == (int)(i + NumMaskElts)) {
	DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
	continue;
	}
	IsBlend = false;
	break;
	}
	if (IsBlend) {
	if (DAG.MaskedVectorIsZero(V1, DemandedZeroV1) &&
	DAG.MaskedVectorIsZero(V2, DemandedZeroV2)) {
	Shuffle = ISD::OR;
	SrcVT = DstVT = MaskVT.changeTypeToInteger();
	return true;
	}
	if (NumV1Elts == NumV2Elts && NumV1Elts == NumMaskElts) {
	// FIXME: handle mismatched sizes?
	// TODO: investigate if `ISD::OR` handling in
	// `TargetLowering::SimplifyDemandedVectorElts` can be improved instead.
	auto computeKnownBitsElementWise = [&DAG](SDValue V) {
	unsigned NumElts = V.getValueType().getVectorNumElements();
	KnownBits Known(NumElts);
	for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
	APInt Mask = APInt::getOneBitSet(NumElts, EltIdx);
	KnownBits PeepholeKnown = DAG.computeKnownBits(V, Mask);
	if (PeepholeKnown.isZero())
	Known.Zero.setBit(EltIdx);
	if (PeepholeKnown.isAllOnes())
	Known.One.setBit(EltIdx);
	}
	return Known;
	};

	KnownBits V1Known = computeKnownBitsElementWise(V1);
	KnownBits V2Known = computeKnownBitsElementWise(V2);

	for (unsigned i = 0; i != NumMaskElts && IsBlend; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	if (M == SM_SentinelZero) {
	IsBlend &= V1Known.Zero[i] && V2Known.Zero[i];
	continue;
	}
	if (M == (int)i) {
	IsBlend &= V2Known.Zero[i] \|\| V1Known.One[i];
	continue;
	}
	if (M == (int)(i + NumMaskElts)) {
	IsBlend &= V1Known.Zero[i] \|\| V2Known.One[i];
	continue;
	}
	llvm_unreachable("will not get here.");
	}
	if (IsBlend) {
	Shuffle = ISD::OR;
	SrcVT = DstVT = MaskVT.changeTypeToInteger();
	return true;
	}
	}
	}
	}

	return false;
	}

	static bool matchBinaryPermuteShuffle(
	MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable,
	bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2,
	const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) {
	unsigned NumMaskElts = Mask.size();
	unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();

	// Attempt to match against VALIGND/VALIGNQ rotate.
	if (AllowIntDomain && (EltSizeInBits == 64 \|\| EltSizeInBits == 32) &&
	((MaskVT.is128BitVector() && Subtarget.hasVLX()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasVLX()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
	if (!isAnyZero(Mask)) {
	int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
	if (0 < Rotation) {
	Shuffle = X86ISD::VALIGN;
	if (EltSizeInBits == 64)
	ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
	else
	ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
	PermuteImm = Rotation;
	return true;
	}
	}
	}

	// Attempt to match against PALIGNR byte rotate.
	if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasBWI()))) {
	int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask);
	if (0 < ByteRotation) {
	Shuffle = X86ISD::PALIGNR;
	ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8);
	PermuteImm = ByteRotation;
	return true;
	}
	}

	// Attempt to combine to X86ISD::BLENDI.
	if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) \|\|
	(Subtarget.hasAVX() && MaskVT.is256BitVector()))) \|\|
	(MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) {
	uint64_t BlendMask = 0;
	bool ForceV1Zero = false, ForceV2Zero = false;
	SmallVector<int, 8> TargetMask(Mask);
	if (matchShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
	ForceV2Zero, BlendMask)) {
	if (MaskVT == MVT::v16i16) {
	// We can only use v16i16 PBLENDW if the lanes are repeated.
	SmallVector<int, 8> RepeatedMask;
	if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask,
	RepeatedMask)) {
	assert(RepeatedMask.size() == 8 &&
	"Repeated mask size doesn't match!");
	PermuteImm = 0;
	for (int i = 0; i < 8; ++i)
	if (RepeatedMask[i] >= 8)
	PermuteImm \|= 1 << i;
	V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
	V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
	Shuffle = X86ISD::BLENDI;
	ShuffleVT = MaskVT;
	return true;
	}
	} else {
	V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
	V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
	PermuteImm = (unsigned)BlendMask;
	Shuffle = X86ISD::BLENDI;
	ShuffleVT = MaskVT;
	return true;
	}
	}
	}

	// Attempt to combine to INSERTPS, but only if it has elements that need to
	// be set to zero.
	if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
	MaskVT.is128BitVector() && isAnyZero(Mask) &&
	matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
	Shuffle = X86ISD::INSERTPS;
	ShuffleVT = MVT::v4f32;
	return true;
	}

	// Attempt to combine to SHUFPD.
	if (AllowFloatDomain && EltSizeInBits == 64 &&
	((MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
	bool ForceV1Zero = false, ForceV2Zero = false;
	if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero,
	PermuteImm, Mask, Zeroable)) {
	V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
	V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
	Shuffle = X86ISD::SHUFP;
	ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
	return true;
	}
	}

	// Attempt to combine to SHUFPS.
	if (AllowFloatDomain && EltSizeInBits == 32 &&
	((MaskVT.is128BitVector() && Subtarget.hasSSE1()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
	SmallVector<int, 4> RepeatedMask;
	if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) {
	// Match each half of the repeated mask, to determine if its just
	// referencing one of the vectors, is zeroable or entirely undef.
	auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) {
	int M0 = RepeatedMask[Offset];
	int M1 = RepeatedMask[Offset + 1];

	if (isUndefInRange(RepeatedMask, Offset, 2)) {
	return DAG.getUNDEF(MaskVT);
	} else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) {
	S0 = (SM_SentinelUndef == M0 ? -1 : 0);
	S1 = (SM_SentinelUndef == M1 ? -1 : 1);
	return getZeroVector(MaskVT, Subtarget, DAG, DL);
	} else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) {
	S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
	S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
	return V1;
	} else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) {
	S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
	S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
	return V2;
	}

	return SDValue();
	};

	int ShufMask[4] = {-1, -1, -1, -1};
	SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
	SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);

	if (Lo && Hi) {
	V1 = Lo;
	V2 = Hi;
	Shuffle = X86ISD::SHUFP;
	ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32);
	PermuteImm = getV4X86ShuffleImm(ShufMask);
	return true;
	}
	}
	}

	// Attempt to combine to INSERTPS more generally if X86ISD::SHUFP failed.
	if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
	MaskVT.is128BitVector() &&
	matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
	Shuffle = X86ISD::INSERTPS;
	ShuffleVT = MVT::v4f32;
	return true;
	}

	return false;
	}

	static SDValue combineX86ShuffleChainWithExtract(
	ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
	bool HasVariableMask, bool AllowVariableCrossLaneMask,
	bool AllowVariablePerLaneMask, SelectionDAG &DAG,
	const X86Subtarget &Subtarget);

	/// Combine an arbitrary chain of shuffles into a single instruction if
	/// possible.
	///
	/// This is the leaf of the recursive combine below. When we have found some
	/// chain of single-use x86 shuffle instructions and accumulated the combined
	/// shuffle mask represented by them, this will try to pattern match that mask
	/// into either a single instruction if there is a special purpose instruction
	/// for this operation, or into a PSHUFB instruction which is a fully general
	/// instruction but should only be used to replace chains over a certain depth.
	static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
	ArrayRef<int> BaseMask, int Depth,
	bool HasVariableMask,
	bool AllowVariableCrossLaneMask,
	bool AllowVariablePerLaneMask,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
	assert((Inputs.size() == 1 \|\| Inputs.size() == 2) &&
	"Unexpected number of shuffle inputs!");

	SDLoc DL(Root);
	MVT RootVT = Root.getSimpleValueType();
	unsigned RootSizeInBits = RootVT.getSizeInBits();
	unsigned NumRootElts = RootVT.getVectorNumElements();

	// Canonicalize shuffle input op to the requested type.
	auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {
	if (VT.getSizeInBits() > Op.getValueSizeInBits())
	Op = widenSubVector(Op, false, Subtarget, DAG, DL, VT.getSizeInBits());
	else if (VT.getSizeInBits() < Op.getValueSizeInBits())
	Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits());
	return DAG.getBitcast(VT, Op);
	};

	// Find the inputs that enter the chain. Note that multiple uses are OK
	// here, we're not going to remove the operands we find.
	bool UnaryShuffle = (Inputs.size() == 1);
	SDValue V1 = peekThroughBitcasts(Inputs[0]);
	SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType())
	: peekThroughBitcasts(Inputs[1]));

	MVT VT1 = V1.getSimpleValueType();
	MVT VT2 = V2.getSimpleValueType();
	assert((RootSizeInBits % VT1.getSizeInBits()) == 0 &&
	(RootSizeInBits % VT2.getSizeInBits()) == 0 && "Vector size mismatch");

	SDValue Res;

	unsigned NumBaseMaskElts = BaseMask.size();
	if (NumBaseMaskElts == 1) {
	assert(BaseMask[0] == 0 && "Invalid shuffle index found!");
	return CanonicalizeShuffleInput(RootVT, V1);
	}

	bool OptForSize = DAG.shouldOptForSize();
	unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
	bool FloatDomain = VT1.isFloatingPoint() \|\| VT2.isFloatingPoint() \|\|
	(RootVT.isFloatingPoint() && Depth >= 1) \|\|
	(RootVT.is256BitVector() && !Subtarget.hasAVX2());

	// Don't combine if we are a AVX512/EVEX target and the mask element size
	// is different from the root element size - this would prevent writemasks
	// from being reused.
	bool IsMaskedShuffle = false;
	if (RootSizeInBits == 512 \|\| (Subtarget.hasVLX() && RootSizeInBits >= 128)) {
	if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT &&
	Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) {
	IsMaskedShuffle = true;
	}
	}

	// If we are shuffling a splat (and not introducing zeros) then we can just
	// use it directly. This works for smaller elements as well as they already
	// repeat across each mask element.
	if (UnaryShuffle && !isAnyZero(BaseMask) &&
	V1.getValueSizeInBits() >= RootSizeInBits &&
	(BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
	DAG.isSplatValue(V1, /AllowUndefs/ false)) {
	return CanonicalizeShuffleInput(RootVT, V1);
	}

	SmallVector<int, 64> Mask(BaseMask);

	// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
	// etc. can be simplified.
	if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits && VT1.isVector()) {
	SmallVector<int> ScaledMask, IdentityMask;
	unsigned NumElts = VT1.getVectorNumElements();
	if (Mask.size() <= NumElts &&
	scaleShuffleElements(Mask, NumElts, ScaledMask)) {
	for (unsigned i = 0; i != NumElts; ++i)
	IdentityMask.push_back(i);
	if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, DAG, V1,
	V2))
	return CanonicalizeShuffleInput(RootVT, V1);
	}
	}

	// Handle 128/256-bit lane shuffles of 512-bit vectors.
	if (RootVT.is512BitVector() &&
	(NumBaseMaskElts == 2 \|\| NumBaseMaskElts == 4)) {
	// If the upper subvectors are zeroable, then an extract+insert is more
	// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
	// to zero the upper subvectors.
	if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {
	if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
	return SDValue(); // Nothing to do!
	assert(isInRange(Mask[0], 0, NumBaseMaskElts) &&
	"Unexpected lane shuffle");
	Res = CanonicalizeShuffleInput(RootVT, V1);
	unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
	bool UseZero = isAnyZero(Mask);
	Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
	return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
	}

	// Narrow shuffle mask to v4x128.
	SmallVector<int, 4> ScaledMask;
	assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
	narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask);

	// Try to lower to vshuf64x2/vshuf32x4.
	auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
	ArrayRef<int> ScaledMask, SDValue V1, SDValue V2,
	SelectionDAG &DAG) {
	unsigned PermMask = 0;
	// Insure elements came from the same Op.
	SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
	for (int i = 0; i < 4; ++i) {
	assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value");
	if (ScaledMask[i] < 0)
	continue;

	SDValue Op = ScaledMask[i] >= 4 ? V2 : V1;
	unsigned OpIndex = i / 2;
	if (Ops[OpIndex].isUndef())
	Ops[OpIndex] = Op;
	else if (Ops[OpIndex] != Op)
	return SDValue();

	// Convert the 128-bit shuffle mask selection values into 128-bit
	// selection bits defined by a vshuf64x2 instruction's immediate control
	// byte.
	PermMask \|= (ScaledMask[i] % 4) << (i * 2);
	}

	return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
	CanonicalizeShuffleInput(ShuffleVT, Ops[0]),
	CanonicalizeShuffleInput(ShuffleVT, Ops[1]),
	DAG.getTargetConstant(PermMask, DL, MVT::i8));
	};

	// FIXME: Is there a better way to do this? is256BitLaneRepeatedShuffleMask
	// doesn't work because our mask is for 128 bits and we don't have an MVT
	// to match that.
	bool PreferPERMQ = UnaryShuffle && isUndefOrInRange(ScaledMask[0], 0, 2) &&
	isUndefOrInRange(ScaledMask[1], 0, 2) &&
	isUndefOrInRange(ScaledMask[2], 2, 4) &&
	isUndefOrInRange(ScaledMask[3], 2, 4) &&
	(ScaledMask[0] < 0 \|\| ScaledMask[2] < 0 \|\|
	ScaledMask[0] == (ScaledMask[2] % 2)) &&
	(ScaledMask[1] < 0 \|\| ScaledMask[3] < 0 \|\|
	ScaledMask[1] == (ScaledMask[3] % 2));

	if (!isAnyZero(ScaledMask) && !PreferPERMQ) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
	return SDValue(); // Nothing to do!
	MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
	if (SDValue V = MatchSHUF128(ShuffleVT, DL, ScaledMask, V1, V2, DAG))
	return DAG.getBitcast(RootVT, V);
	}
	}

	// Handle 128-bit lane shuffles of 256-bit vectors.
	if (RootVT.is256BitVector() && NumBaseMaskElts == 2) {
	// If the upper half is zeroable, then an extract+insert is more optimal
	// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
	// zero the upper half.
	if (isUndefOrZero(Mask[1])) {
	if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
	return SDValue(); // Nothing to do!
	assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle");
	Res = CanonicalizeShuffleInput(RootVT, V1);
	Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL);
	return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL,
	256);
	}

	// If we're inserting the low subvector, an insert-subvector 'concat'
	// pattern is quicker than VPERM2X128.
	// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
	if (BaseMask[0] == 0 && (BaseMask[1] == 0 \|\| BaseMask[1] == 2) &&
	!Subtarget.hasAVX2()) {
	if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
	return SDValue(); // Nothing to do!
	SDValue Lo = CanonicalizeShuffleInput(RootVT, V1);
	SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2);
	Hi = extractSubVector(Hi, 0, DAG, DL, 128);
	return insertSubVector(Lo, Hi, NumRootElts / 2, DAG, DL, 128);
	}

	if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
	return SDValue(); // Nothing to do!

	// If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
	// we need to use the zeroing feature.
	// Prefer blends for sequential shuffles unless we are optimizing for size.
	if (UnaryShuffle &&
	!(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
	(OptForSize \|\| !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
	unsigned PermMask = 0;
	PermMask \|= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
	PermMask \|= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
	return DAG.getNode(
	X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
	DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
	}

	if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
	return SDValue(); // Nothing to do!

	// TODO - handle AVX512VL cases with X86ISD::SHUF128.
	if (!UnaryShuffle && !IsMaskedShuffle) {
	assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) &&
	"Unexpected shuffle sentinel value");
	// Prefer blends to X86ISD::VPERM2X128.
	if (!((Mask[0] == 0 && Mask[1] == 3) \|\| (Mask[0] == 2 && Mask[1] == 1))) {
	unsigned PermMask = 0;
	PermMask \|= ((Mask[0] & 3) << 0);
	PermMask \|= ((Mask[1] & 3) << 4);
	SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
	SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
	return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
	CanonicalizeShuffleInput(RootVT, LHS),
	CanonicalizeShuffleInput(RootVT, RHS),
	DAG.getTargetConstant(PermMask, DL, MVT::i8));
	}
	}
	}

	// For masks that have been widened to 128-bit elements or more,
	// narrow back down to 64-bit elements.
	if (BaseMaskEltSizeInBits > 64) {
	assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
	int MaskScale = BaseMaskEltSizeInBits / 64;
	SmallVector<int, 64> ScaledMask;
	narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
	Mask = std::move(ScaledMask);
	}

	// For masked shuffles, we're trying to match the root width for better
	// writemask folding, attempt to scale the mask.
	// TODO - variable shuffles might need this to be widened again.
	if (IsMaskedShuffle && NumRootElts > Mask.size()) {
	assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size");
	int MaskScale = NumRootElts / Mask.size();
	SmallVector<int, 64> ScaledMask;
	narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
	Mask = std::move(ScaledMask);
	}

	unsigned NumMaskElts = Mask.size();
	unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;

	// Determine the effective mask value type.
	FloatDomain &= (32 <= MaskEltSizeInBits);
	MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
	: MVT::getIntegerVT(MaskEltSizeInBits);
	MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);

	// Only allow legal mask types.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
	return SDValue();

	// Attempt to match the mask against known shuffle patterns.
	MVT ShuffleSrcVT, ShuffleVT;
	unsigned Shuffle, PermuteImm;

	// Which shuffle domains are permitted?
	// Permit domain crossing at higher combine depths.
	// TODO: Should we indicate which domain is preferred if both are allowed?
	bool AllowFloatDomain = FloatDomain \|\| (Depth >= 3);
	bool AllowIntDomain = (!FloatDomain \|\| (Depth >= 3)) && Subtarget.hasSSE2() &&
	(!MaskVT.is256BitVector() \|\| Subtarget.hasAVX2());

	// Determine zeroable mask elements.
	APInt KnownUndef, KnownZero;
	resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero);
	APInt Zeroable = KnownUndef \| KnownZero;

	if (UnaryShuffle) {
	// Attempt to match against broadcast-from-vector.
	// Limit AVX1 to cases where we're loading+broadcasting a scalar element.
	if ((Subtarget.hasAVX2() \|\|
	(Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) &&
	(!IsMaskedShuffle \|\| NumRootElts == NumMaskElts)) {
	if (isUndefOrEqual(Mask, 0)) {
	if (V1.getValueType() == MaskVT &&
	V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	X86::mayFoldLoad(V1.getOperand(0), Subtarget)) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
	return SDValue(); // Nothing to do!
	Res = V1.getOperand(0);
	Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
	return DAG.getBitcast(RootVT, Res);
	}
	if (Subtarget.hasAVX2()) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
	return SDValue(); // Nothing to do!
	Res = CanonicalizeShuffleInput(MaskVT, V1);
	Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
	return DAG.getBitcast(RootVT, Res);
	}
	}
	}

	if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1,
	DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
	(!IsMaskedShuffle \|\|
	(NumRootElts == ShuffleVT.getVectorNumElements()))) {
	if (Depth == 0 && Root.getOpcode() == Shuffle)
	return SDValue(); // Nothing to do!
	Res = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
	return DAG.getBitcast(RootVT, Res);
	}

	if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
	AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
	PermuteImm) &&
	(!IsMaskedShuffle \|\|
	(NumRootElts == ShuffleVT.getVectorNumElements()))) {
	if (Depth == 0 && Root.getOpcode() == Shuffle)
	return SDValue(); // Nothing to do!
	Res = CanonicalizeShuffleInput(ShuffleVT, V1);
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
	DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}
	}

	// Attempt to combine to INSERTPS, but only if the inserted element has come
	// from a scalar.
	// TODO: Handle other insertions here as well?
	if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
	Subtarget.hasSSE41() &&
	!isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG)) {
	if (MaskEltSizeInBits == 32) {
	SDValue SrcV1 = V1, SrcV2 = V2;
	if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask,
	DAG) &&
	SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
	return SDValue(); // Nothing to do!
	Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
	CanonicalizeShuffleInput(MVT::v4f32, SrcV1),
	CanonicalizeShuffleInput(MVT::v4f32, SrcV2),
	DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}
	}
	if (MaskEltSizeInBits == 64 &&
	isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}, DAG) &&
	V2.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	V2.getScalarValueSizeInBits() <= 32) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
	return SDValue(); // Nothing to do!
	PermuteImm = (/DstIdx/ 2 << 4) \| (/SrcIdx/ 0 << 0);
	Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
	CanonicalizeShuffleInput(MVT::v4f32, V1),
	CanonicalizeShuffleInput(MVT::v4f32, V2),
	DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}
	}

	SDValue NewV1 = V1; // Save operands in case early exit happens.
	SDValue NewV2 = V2;
	if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
	NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
	ShuffleVT, UnaryShuffle) &&
	(!IsMaskedShuffle \|\| (NumRootElts == ShuffleVT.getVectorNumElements()))) {
	if (Depth == 0 && Root.getOpcode() == Shuffle)
	return SDValue(); // Nothing to do!
	NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
	NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2);
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
	return DAG.getBitcast(RootVT, Res);
	}

	NewV1 = V1; // Save operands in case early exit happens.
	NewV2 = V2;
	if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
	AllowIntDomain, NewV1, NewV2, DL, DAG,
	Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
	(!IsMaskedShuffle \|\| (NumRootElts == ShuffleVT.getVectorNumElements()))) {
	if (Depth == 0 && Root.getOpcode() == Shuffle)
	return SDValue(); // Nothing to do!
	NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1);
	NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2);
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
	DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}

	// Typically from here on, we need an integer version of MaskVT.
	MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits);
	IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts);

	// Annoyingly, SSE4A instructions don't map into the above match helpers.
	if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
	uint64_t BitLen, BitIdx;
	if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
	Zeroable)) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
	return SDValue(); // Nothing to do!
	V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
	Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
	DAG.getTargetConstant(BitLen, DL, MVT::i8),
	DAG.getTargetConstant(BitIdx, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}

	if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
	if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
	return SDValue(); // Nothing to do!
	V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
	V2 = CanonicalizeShuffleInput(IntMaskVT, V2);
	Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
	DAG.getTargetConstant(BitLen, DL, MVT::i8),
	DAG.getTargetConstant(BitIdx, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}
	}

	// Match shuffle against TRUNCATE patterns.
	if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.hasAVX512()) {
	// Match against a VTRUNC instruction, accounting for src/dst sizes.
	if (matchShuffleAsVTRUNC(ShuffleSrcVT, ShuffleVT, IntMaskVT, Mask, Zeroable,
	Subtarget)) {
	bool IsTRUNCATE = ShuffleVT.getVectorNumElements() ==
	ShuffleSrcVT.getVectorNumElements();
	unsigned Opc =
	IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC;
	if (Depth == 0 && Root.getOpcode() == Opc)
	return SDValue(); // Nothing to do!
	V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
	Res = DAG.getNode(Opc, DL, ShuffleVT, V1);
	if (ShuffleVT.getSizeInBits() < RootSizeInBits)
	Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits);
	return DAG.getBitcast(RootVT, Res);
	}

	// Do we need a more general binary truncation pattern?
	if (RootSizeInBits < 512 &&
	((RootVT.is256BitVector() && Subtarget.useAVX512Regs()) \|\|
	(RootVT.is128BitVector() && Subtarget.hasVLX())) &&
	(MaskEltSizeInBits > 8 \|\| Subtarget.hasBWI()) &&
	isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) {
	// Bail if this was already a truncation or PACK node.
	// We sometimes fail to match PACK if we demand known undef elements.
	if (Depth == 0 && (Root.getOpcode() == ISD::TRUNCATE \|\|
	Root.getOpcode() == X86ISD::PACKSS \|\|
	Root.getOpcode() == X86ISD::PACKUS))
	return SDValue(); // Nothing to do!
	ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
	ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);
	V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
	V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2);
	ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
	ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts);
	Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2);
	Res = DAG.getNode(ISD::TRUNCATE, DL, IntMaskVT, Res);
	return DAG.getBitcast(RootVT, Res);
	}
	}

	// Don't try to re-form single instruction chains under any circumstances now
	// that we've done encoding canonicalization for them.
	if (Depth < 1)
	return SDValue();

	// Depth threshold above which we can efficiently use variable mask shuffles.
	int VariableCrossLaneShuffleDepth =
	Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2;
	int VariablePerLaneShuffleDepth =
	Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2;
	AllowVariableCrossLaneMask &=
	(Depth >= VariableCrossLaneShuffleDepth) \|\| HasVariableMask;
	AllowVariablePerLaneMask &=
	(Depth >= VariablePerLaneShuffleDepth) \|\| HasVariableMask;
	// VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a
	// higher depth before combining them.
	bool AllowBWIVPERMV3 =
	(Depth >= (VariableCrossLaneShuffleDepth + 2) \|\| HasVariableMask);

	bool MaskContainsZeros = isAnyZero(Mask);

	if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
	// If we have a single input lane-crossing shuffle then lower to VPERMV.
	if (UnaryShuffle && AllowVariableCrossLaneMask && !MaskContainsZeros) {
	if (Subtarget.hasAVX2() &&
	(MaskVT == MVT::v8f32 \|\| MaskVT == MVT::v8i32)) {
	SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
	Res = CanonicalizeShuffleInput(MaskVT, V1);
	Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
	return DAG.getBitcast(RootVT, Res);
	}
	// AVX512 variants (non-VLX will pad to 512-bit shuffles).
	if ((Subtarget.hasAVX512() &&
	(MaskVT == MVT::v8f64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v16f32 \|\| MaskVT == MVT::v16i32)) \|\|
	(Subtarget.hasBWI() &&
	(MaskVT == MVT::v16i16 \|\| MaskVT == MVT::v32i16)) \|\|
	(Subtarget.hasVBMI() &&
	(MaskVT == MVT::v32i8 \|\| MaskVT == MVT::v64i8))) {
	V1 = CanonicalizeShuffleInput(MaskVT, V1);
	V2 = DAG.getUNDEF(MaskVT);
	Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
	return DAG.getBitcast(RootVT, Res);
	}
	}

	// Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero
	// vector as the second source (non-VLX will pad to 512-bit shuffles).
	if (UnaryShuffle && AllowVariableCrossLaneMask &&
	((Subtarget.hasAVX512() &&
	(MaskVT == MVT::v8f64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v4i64 \|\|
	MaskVT == MVT::v8f32 \|\| MaskVT == MVT::v8i32 \|\|
	MaskVT == MVT::v16f32 \|\| MaskVT == MVT::v16i32)) \|\|
	(Subtarget.hasBWI() && AllowBWIVPERMV3 &&
	(MaskVT == MVT::v16i16 \|\| MaskVT == MVT::v32i16)) \|\|
	(Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
	(MaskVT == MVT::v32i8 \|\| MaskVT == MVT::v64i8)))) {
	// Adjust shuffle mask - replace SM_SentinelZero with second source index.
	for (unsigned i = 0; i != NumMaskElts; ++i)
	if (Mask[i] == SM_SentinelZero)
	Mask[i] = NumMaskElts + i;
	V1 = CanonicalizeShuffleInput(MaskVT, V1);
	V2 = getZeroVector(MaskVT, Subtarget, DAG, DL);
	Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
	return DAG.getBitcast(RootVT, Res);
	}

	// If that failed and either input is extracted then try to combine as a
	// shuffle with the larger type.
	if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
	Inputs, Root, BaseMask, Depth, HasVariableMask,
	AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG,
	Subtarget))
	return WideShuffle;

	// If we have a dual input lane-crossing shuffle then lower to VPERMV3,
	// (non-VLX will pad to 512-bit shuffles).
	if (AllowVariableCrossLaneMask && !MaskContainsZeros &&
	((Subtarget.hasAVX512() &&
	(MaskVT == MVT::v8f64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v4i64 \|\|
	MaskVT == MVT::v16f32 \|\| MaskVT == MVT::v16i32 \|\|
	MaskVT == MVT::v8f32 \|\| MaskVT == MVT::v8i32)) \|\|
	(Subtarget.hasBWI() && AllowBWIVPERMV3 &&
	(MaskVT == MVT::v16i16 \|\| MaskVT == MVT::v32i16)) \|\|
	(Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
	(MaskVT == MVT::v32i8 \|\| MaskVT == MVT::v64i8)))) {
	V1 = CanonicalizeShuffleInput(MaskVT, V1);
	V2 = CanonicalizeShuffleInput(MaskVT, V2);
	Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
	return DAG.getBitcast(RootVT, Res);
	}
	return SDValue();
	}

	// See if we can combine a single input shuffle with zeros to a bit-mask,
	// which is much simpler than any shuffle.
	if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask &&
	isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) &&
	DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
	APInt Zero = APInt::getZero(MaskEltSizeInBits);
	APInt AllOnes = APInt::getAllOnes(MaskEltSizeInBits);
	APInt UndefElts(NumMaskElts, 0);
	SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef) {
	UndefElts.setBit(i);
	continue;
	}
	if (M == SM_SentinelZero)
	continue;
	EltBits[i] = AllOnes;
	}
	SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
	Res = CanonicalizeShuffleInput(MaskVT, V1);
	unsigned AndOpcode =
	MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
	Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
	return DAG.getBitcast(RootVT, Res);
	}

	// If we have a single input shuffle with different shuffle patterns in the
	// the 128-bit lanes use the variable mask to VPERMILPS.
	// TODO Combine other mask types at higher depths.
	if (UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros &&
	((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) \|\|
	(MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) {
	SmallVector<SDValue, 16> VPermIdx;
	for (int M : Mask) {
	SDValue Idx =
	M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32);
	VPermIdx.push_back(Idx);
	}
	SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx);
	Res = CanonicalizeShuffleInput(MaskVT, V1);
	Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
	return DAG.getBitcast(RootVT, Res);
	}

	// With XOP, binary shuffles of 128/256-bit floating point vectors can combine
	// to VPERMIL2PD/VPERMIL2PS.
	if (AllowVariablePerLaneMask && Subtarget.hasXOP() &&
	(MaskVT == MVT::v2f64 \|\| MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v4f32 \|\|
	MaskVT == MVT::v8f32)) {
	// VPERMIL2 Operation.
	// Bits[3] - Match Bit.
	// Bits[2:1] - (Per Lane) PD Shuffle Mask.
	// Bits[2:0] - (Per Lane) PS Shuffle Mask.
	unsigned NumLanes = MaskVT.getSizeInBits() / 128;
	unsigned NumEltsPerLane = NumMaskElts / NumLanes;
	SmallVector<int, 8> VPerm2Idx;
	unsigned M2ZImm = 0;
	for (int M : Mask) {
	if (M == SM_SentinelUndef) {
	VPerm2Idx.push_back(-1);
	continue;
	}
	if (M == SM_SentinelZero) {
	M2ZImm = 2;
	VPerm2Idx.push_back(8);
	continue;
	}
	int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
	Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
	VPerm2Idx.push_back(Index);
	}
	V1 = CanonicalizeShuffleInput(MaskVT, V1);
	V2 = CanonicalizeShuffleInput(MaskVT, V2);
	SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
	Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
	DAG.getTargetConstant(M2ZImm, DL, MVT::i8));
	return DAG.getBitcast(RootVT, Res);
	}

	// If we have 3 or more shuffle instructions or a chain involving a variable
	// mask, we can replace them with a single PSHUFB instruction profitably.
	// Intel's manuals suggest only using PSHUFB if doing so replacing 5
	// instructions, but in practice PSHUFB tends to be very fast so we're
	// more aggressive.
	if (UnaryShuffle && AllowVariablePerLaneMask &&
	((RootVT.is128BitVector() && Subtarget.hasSSSE3()) \|\|
	(RootVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(RootVT.is512BitVector() && Subtarget.hasBWI()))) {
	SmallVector<SDValue, 16> PSHUFBMask;
	int NumBytes = RootVT.getSizeInBits() / 8;
	int Ratio = NumBytes / NumMaskElts;
	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / Ratio];
	if (M == SM_SentinelUndef) {
	PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
	continue;
	}
	if (M == SM_SentinelZero) {
	PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
	continue;
	}
	M = Ratio * M + i % Ratio;
	assert((M / 16) == (i / 16) && "Lane crossing detected");
	PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
	}
	MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
	Res = CanonicalizeShuffleInput(ByteVT, V1);
	SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask);
	Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp);
	return DAG.getBitcast(RootVT, Res);
	}

	// With XOP, if we have a 128-bit binary input shuffle we can always combine
	// to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
	// slower than PSHUFB on targets that support both.
	if (AllowVariablePerLaneMask && RootVT.is128BitVector() &&
	Subtarget.hasXOP()) {
	// VPPERM Mask Operation
	// Bits[4:0] - Byte Index (0 - 31)
	// Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)
	SmallVector<SDValue, 16> VPPERMMask;
	int NumBytes = 16;
	int Ratio = NumBytes / NumMaskElts;
	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / Ratio];
	if (M == SM_SentinelUndef) {
	VPPERMMask.push_back(DAG.getUNDEF(MVT::i8));
	continue;
	}
	if (M == SM_SentinelZero) {
	VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
	continue;
	}
	M = Ratio * M + i % Ratio;
	VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
	}
	MVT ByteVT = MVT::v16i8;
	V1 = CanonicalizeShuffleInput(ByteVT, V1);
	V2 = CanonicalizeShuffleInput(ByteVT, V2);
	SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
	Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
	return DAG.getBitcast(RootVT, Res);
	}

	// If that failed and either input is extracted then try to combine as a
	// shuffle with the larger type.
	if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
	Inputs, Root, BaseMask, Depth, HasVariableMask,
	AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget))
	return WideShuffle;

	// If we have a dual input shuffle then lower to VPERMV3,
	// (non-VLX will pad to 512-bit shuffles)
	if (!UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros &&
	((Subtarget.hasAVX512() &&
	(MaskVT == MVT::v2f64 \|\| MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v8f64 \|\|
	MaskVT == MVT::v2i64 \|\| MaskVT == MVT::v4i64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v4f32 \|\| MaskVT == MVT::v4i32 \|\| MaskVT == MVT::v8f32 \|\|
	MaskVT == MVT::v8i32 \|\| MaskVT == MVT::v16f32 \|\|
	MaskVT == MVT::v16i32)) \|\|
	(Subtarget.hasBWI() && AllowBWIVPERMV3 &&
	(MaskVT == MVT::v8i16 \|\| MaskVT == MVT::v16i16 \|\|
	MaskVT == MVT::v32i16)) \|\|
	(Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
	(MaskVT == MVT::v16i8 \|\| MaskVT == MVT::v32i8 \|\|
	MaskVT == MVT::v64i8)))) {
	V1 = CanonicalizeShuffleInput(MaskVT, V1);
	V2 = CanonicalizeShuffleInput(MaskVT, V2);
	Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
	return DAG.getBitcast(RootVT, Res);
	}

	// Failed to find any combines.
	return SDValue();
	}

	// Combine an arbitrary chain of shuffles + extract_subvectors into a single
	// instruction if possible.
	//
	// Wrapper for combineX86ShuffleChain that extends the shuffle mask to a larger
	// type size to attempt to combine:
	// shuffle(extract_subvector(x,c1),extract_subvector(y,c2),m1)
	// -->
	// extract_subvector(shuffle(x,y,m2),0)
	static SDValue combineX86ShuffleChainWithExtract(
	ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
	bool HasVariableMask, bool AllowVariableCrossLaneMask,
	bool AllowVariablePerLaneMask, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned NumMaskElts = BaseMask.size();
	unsigned NumInputs = Inputs.size();
	if (NumInputs == 0)
	return SDValue();

	EVT RootVT = Root.getValueType();
	unsigned RootSizeInBits = RootVT.getSizeInBits();
	assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask");

	// Bail if we have any smaller inputs.
	if (llvm::any_of(Inputs, [RootSizeInBits](SDValue Input) {
	return Input.getValueSizeInBits() < RootSizeInBits;
	}))
	return SDValue();

	SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
	SmallVector<unsigned, 4> Offsets(NumInputs, 0);

	// Peek through subvectors.
	// TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
	unsigned WideSizeInBits = RootSizeInBits;
	for (unsigned i = 0; i != NumInputs; ++i) {
	SDValue &Src = WideInputs[i];
	unsigned &Offset = Offsets[i];
	Src = peekThroughBitcasts(Src);
	EVT BaseVT = Src.getValueType();
	while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	Offset += Src.getConstantOperandVal(1);
	Src = Src.getOperand(0);
	}
	WideSizeInBits = std::max(WideSizeInBits,
	(unsigned)Src.getValueSizeInBits());
	assert((Offset % BaseVT.getVectorNumElements()) == 0 &&
	"Unexpected subvector extraction");
	Offset /= BaseVT.getVectorNumElements();
	Offset *= NumMaskElts;
	}

	// Bail if we're always extracting from the lowest subvectors,
	// combineX86ShuffleChain should match this for the current width.
	if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
	return SDValue();

	unsigned Scale = WideSizeInBits / RootSizeInBits;
	assert((WideSizeInBits % RootSizeInBits) == 0 &&
	"Unexpected subvector extraction");

	// If the src vector types aren't the same, see if we can extend
	// them to match each other.
	// TODO: Support different scalar types?
	EVT WideSVT = WideInputs[0].getValueType().getScalarType();
	if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) {
	return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) \|\|
	Op.getValueType().getScalarType() != WideSVT;
	}))
	return SDValue();

	// Create new mask for larger type.
	for (unsigned i = 1; i != NumInputs; ++i)
	Offsets[i] += i * Scale * NumMaskElts;

	SmallVector<int, 64> WideMask(BaseMask);
	for (int &M : WideMask) {
	if (M < 0)
	continue;
	M = (M % NumMaskElts) + Offsets[M / NumMaskElts];
	}
	WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);

	// Remove unused/repeated shuffle source ops.
	resolveTargetShuffleInputsAndMask(WideInputs, WideMask);
	assert(!WideInputs.empty() && "Shuffle with no inputs detected");

	if (WideInputs.size() > 2)
	return SDValue();

	// Increase depth for every upper subvector we've peeked through.
	Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; });

	// Attempt to combine wider chain.
	// TODO: Can we use a better Root?
	SDValue WideRoot = WideInputs.front().getValueSizeInBits() >
	WideInputs.back().getValueSizeInBits()
	? WideInputs.front()
	: WideInputs.back();
	if (SDValue WideShuffle =
	combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
	HasVariableMask, AllowVariableCrossLaneMask,
	AllowVariablePerLaneMask, DAG, Subtarget)) {
	WideShuffle =
	extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
	return DAG.getBitcast(RootVT, WideShuffle);
	}
	return SDValue();
	}

	// Canonicalize the combined shuffle mask chain with horizontal ops.
	// NOTE: This may update the Ops and Mask.
	static SDValue canonicalizeShuffleMaskWithHorizOp(
	MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
	unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (Mask.empty() \|\| Ops.empty())
	return SDValue();

	SmallVector<SDValue> BC;
	for (SDValue Op : Ops)
	BC.push_back(peekThroughBitcasts(Op));

	// All ops must be the same horizop + type.
	SDValue BC0 = BC[0];
	EVT VT0 = BC0.getValueType();
	unsigned Opcode0 = BC0.getOpcode();
	if (VT0.getSizeInBits() != RootSizeInBits \|\| llvm::any_of(BC, [&](SDValue V) {
	return V.getOpcode() != Opcode0 \|\| V.getValueType() != VT0;
	}))
	return SDValue();

	bool isHoriz = (Opcode0 == X86ISD::FHADD \|\| Opcode0 == X86ISD::HADD \|\|
	Opcode0 == X86ISD::FHSUB \|\| Opcode0 == X86ISD::HSUB);
	bool isPack = (Opcode0 == X86ISD::PACKSS \|\| Opcode0 == X86ISD::PACKUS);
	if (!isHoriz && !isPack)
	return SDValue();

	// Do all ops have a single use?
	bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) {
	return Op.hasOneUse() &&
	peekThroughBitcasts(Op) == peekThroughOneUseBitcasts(Op);
	});

	int NumElts = VT0.getVectorNumElements();
	int NumLanes = VT0.getSizeInBits() / 128;
	int NumEltsPerLane = NumElts / NumLanes;
	int NumHalfEltsPerLane = NumEltsPerLane / 2;
	MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
	unsigned EltSizeInBits = RootSizeInBits / Mask.size();

	if (NumEltsPerLane >= 4 &&
	(isPack \|\| shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget))) {
	SmallVector<int> LaneMask, ScaledMask;
	if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, LaneMask) &&
	scaleShuffleElements(LaneMask, 4, ScaledMask)) {
	// See if we can remove the shuffle by resorting the HOP chain so that
	// the HOP args are pre-shuffled.
	// TODO: Generalize to any sized/depth chain.
	// TODO: Add support for PACKSS/PACKUS.
	if (isHoriz) {
	// Attempt to find a HOP(HOP(X,Y),HOP(Z,W)) source operand.
	auto GetHOpSrc = [&](int M) {
	if (M == SM_SentinelUndef)
	return DAG.getUNDEF(VT0);
	if (M == SM_SentinelZero)
	return getZeroVector(VT0.getSimpleVT(), Subtarget, DAG, DL);
	SDValue Src0 = BC[M / 4];
	SDValue Src1 = Src0.getOperand((M % 4) >= 2);
	if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode()))
	return Src1.getOperand(M % 2);
	return SDValue();
	};
	SDValue M0 = GetHOpSrc(ScaledMask[0]);
	SDValue M1 = GetHOpSrc(ScaledMask[1]);
	SDValue M2 = GetHOpSrc(ScaledMask[2]);
	SDValue M3 = GetHOpSrc(ScaledMask[3]);
	if (M0 && M1 && M2 && M3) {
	SDValue LHS = DAG.getNode(Opcode0, DL, SrcVT, M0, M1);
	SDValue RHS = DAG.getNode(Opcode0, DL, SrcVT, M2, M3);
	return DAG.getNode(Opcode0, DL, VT0, LHS, RHS);
	}
	}
	// shuffle(hop(x,y),hop(z,w)) -> permute(hop(x,z)) etc.
	if (Ops.size() >= 2) {
	SDValue LHS, RHS;
	auto GetHOpSrc = [&](int M, int &OutM) {
	// TODO: Support SM_SentinelZero
	if (M < 0)
	return M == SM_SentinelUndef;
	SDValue Src = BC[M / 4].getOperand((M % 4) >= 2);
	if (!LHS \|\| LHS == Src) {
	LHS = Src;
	OutM = (M % 2);
	return true;
	}
	if (!RHS \|\| RHS == Src) {
	RHS = Src;
	OutM = (M % 2) + 2;
	return true;
	}
	return false;
	};
	int PostMask[4] = {-1, -1, -1, -1};
	if (GetHOpSrc(ScaledMask[0], PostMask[0]) &&
	GetHOpSrc(ScaledMask[1], PostMask[1]) &&
	GetHOpSrc(ScaledMask[2], PostMask[2]) &&
	GetHOpSrc(ScaledMask[3], PostMask[3])) {
	LHS = DAG.getBitcast(SrcVT, LHS);
	RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS);
	SDValue Res = DAG.getNode(Opcode0, DL, VT0, LHS, RHS);
	// Use SHUFPS for the permute so this will work on SSE3 targets,
	// shuffle combining and domain handling will simplify this later on.
	MVT ShuffleVT = MVT::getVectorVT(MVT::f32, RootSizeInBits / 32);
	Res = DAG.getBitcast(ShuffleVT, Res);
	return DAG.getNode(X86ISD::SHUFP, DL, ShuffleVT, Res, Res,
	getV4X86ShuffleImm8ForMask(PostMask, DL, DAG));
	}
	}
	}
	}

	if (2 < Ops.size())
	return SDValue();

	SDValue BC1 = BC[BC.size() - 1];
	if (Mask.size() == VT0.getVectorNumElements()) {
	// Canonicalize binary shuffles of horizontal ops that use the
	// same sources to an unary shuffle.
	// TODO: Try to perform this fold even if the shuffle remains.
	if (Ops.size() == 2) {
	auto ContainsOps = [](SDValue HOp, SDValue Op) {
	return Op == HOp.getOperand(0) \|\| Op == HOp.getOperand(1);
	};
	// Commute if all BC0's ops are contained in BC1.
	if (ContainsOps(BC1, BC0.getOperand(0)) &&
	ContainsOps(BC1, BC0.getOperand(1))) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(Ops[0], Ops[1]);
	std::swap(BC0, BC1);
	}

	// If BC1 can be represented by BC0, then convert to unary shuffle.
	if (ContainsOps(BC0, BC1.getOperand(0)) &&
	ContainsOps(BC0, BC1.getOperand(1))) {
	for (int &M : Mask) {
	if (M < NumElts) // BC0 element or UNDEF/Zero sentinel.
	continue;
	int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0;
	M -= NumElts + (SubLane * NumHalfEltsPerLane);
	if (BC1.getOperand(SubLane) != BC0.getOperand(0))
	M += NumHalfEltsPerLane;
	}
	}
	}

	// Canonicalize unary horizontal ops to only refer to lower halves.
	for (int i = 0; i != NumElts; ++i) {
	int &M = Mask[i];
	if (isUndefOrZero(M))
	continue;
	if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) &&
	(M % NumEltsPerLane) >= NumHalfEltsPerLane)
	M -= NumHalfEltsPerLane;
	if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) &&
	(M % NumEltsPerLane) >= NumHalfEltsPerLane)
	M -= NumHalfEltsPerLane;
	}
	}

	// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
	// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
	// represents the LHS/RHS inputs for the lower/upper halves.
	SmallVector<int, 16> TargetMask128, WideMask128;
	if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, TargetMask128) &&
	scaleShuffleElements(TargetMask128, 2, WideMask128)) {
	assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
	bool SingleOp = (Ops.size() == 1);
	if (isPack \|\| OneUseOps \|\|
	shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
	SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
	SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
	Lo = Lo.getOperand(WideMask128[0] & 1);
	Hi = Hi.getOperand(WideMask128[1] & 1);
	if (SingleOp) {
	SDValue Undef = DAG.getUNDEF(SrcVT);
	SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
	Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
	Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
	Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
	Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
	}
	return DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
	}
	}

	return SDValue();
	}

	// Attempt to constant fold all of the constant source ops.
	// Returns true if the entire shuffle is folded to a constant.
	// TODO: Extend this to merge multiple constant Ops and update the mask.
	static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
	ArrayRef<int> Mask, SDValue Root,
	bool HasVariableMask,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Root.getSimpleValueType();

	unsigned SizeInBits = VT.getSizeInBits();
	unsigned NumMaskElts = Mask.size();
	unsigned MaskSizeInBits = SizeInBits / NumMaskElts;
	unsigned NumOps = Ops.size();

	// Extract constant bits from each source op.
	bool OneUseConstantOp = false;
	SmallVector<APInt, 16> UndefEltsOps(NumOps);
	SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps);
	for (unsigned i = 0; i != NumOps; ++i) {
	SDValue SrcOp = Ops[i];
	OneUseConstantOp \|= SrcOp.hasOneUse();
	if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i],
	RawBitsOps[i]))
	return SDValue();
	}

	// If we're optimizing for size, only fold if at least one of the constants is
	// only used once or the combined shuffle has included a variable mask
	// shuffle, this is to avoid constant pool bloat.
	bool IsOptimizingSize = DAG.shouldOptForSize();
	if (IsOptimizingSize && !OneUseConstantOp && !HasVariableMask)
	return SDValue();

	// Shuffle the constant bits according to the mask.
	SDLoc DL(Root);
	APInt UndefElts(NumMaskElts, 0);
	APInt ZeroElts(NumMaskElts, 0);
	APInt ConstantElts(NumMaskElts, 0);
	SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
	APInt::getZero(MaskSizeInBits));
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef) {
	UndefElts.setBit(i);
	continue;
	} else if (M == SM_SentinelZero) {
	ZeroElts.setBit(i);
	continue;
	}
	assert(0 <= M && M < (int)(NumMaskElts * NumOps));

	unsigned SrcOpIdx = (unsigned)M / NumMaskElts;
	unsigned SrcMaskIdx = (unsigned)M % NumMaskElts;

	auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
	if (SrcUndefElts[SrcMaskIdx]) {
	UndefElts.setBit(i);
	continue;
	}

	auto &SrcEltBits = RawBitsOps[SrcOpIdx];
	APInt &Bits = SrcEltBits[SrcMaskIdx];
	if (!Bits) {
	ZeroElts.setBit(i);
	continue;
	}

	ConstantElts.setBit(i);
	ConstantBitData[i] = Bits;
	}
	assert((UndefElts \| ZeroElts \| ConstantElts).isAllOnes());

	// Attempt to create a zero vector.
	if ((UndefElts \| ZeroElts).isAllOnes())
	return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL);

	// Create the constant data.
	MVT MaskSVT;
	if (VT.isFloatingPoint() && (MaskSizeInBits == 32 \|\| MaskSizeInBits == 64))
	MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits);
	else
	MaskSVT = MVT::getIntegerVT(MaskSizeInBits);

	MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);
	if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
	return SDValue();

	SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
	return DAG.getBitcast(VT, CstOp);
	}

	namespace llvm {
	namespace X86 {
	enum {
	MaxShuffleCombineDepth = 8
	};
	}
	} // namespace llvm

	/// Fully generic combining of x86 shuffle instructions.
	///
	/// This should be the last combine run over the x86 shuffle instructions. Once
	/// they have been fully optimized, this will recursively consider all chains
	/// of single-use shuffle instructions, build a generic model of the cumulative
	/// shuffle operation, and check for simpler instructions which implement this
	/// operation. We use this primarily for two purposes:
	///
	/// 1) Collapse generic shuffles to specialized single instructions when
	/// equivalent. In most cases, this is just an encoding size win, but
	/// sometimes we will collapse multiple generic shuffles into a single
	/// special-purpose shuffle.
	/// 2) Look for sequences of shuffle instructions with 3 or more total
	/// instructions, and replace them with the slightly more expensive SSSE3
	/// PSHUFB instruction if available. We do this as the last combining step
	/// to ensure we avoid using PSHUFB if we can implement the shuffle with
	/// a suitable short sequence of other instructions. The PSHUFB will either
	/// use a register or have to read from memory and so is slightly (but only
	/// slightly) more expensive than the other shuffle instructions.
	///
	/// Because this is inherently a quadratic operation (for each shuffle in
	/// a chain, we recurse up the chain), the depth is limited to 8 instructions.
	/// This should never be an issue in practice as the shuffle lowering doesn't
	/// produce sequences of more than 8 instructions.
	///
	/// FIXME: We will currently miss some cases where the redundant shuffling
	/// would simplify under the threshold for PSHUFB formation because of
	/// combine-ordering. To fix this, we should do the redundant instruction
	/// combining in this recursive walk.
	static SDValue combineX86ShufflesRecursively(
	ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
	ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
	unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask,
	bool AllowVariablePerLaneMask, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(RootMask.size() > 0 &&
	(RootMask.size() > 1 \|\| (RootMask[0] == 0 && SrcOpIndex == 0)) &&
	"Illegal shuffle root mask");
	MVT RootVT = Root.getSimpleValueType();
	assert(RootVT.isVector() && "Shuffles operate on vector types!");
	unsigned RootSizeInBits = RootVT.getSizeInBits();

	// Bound the depth of our recursive combine because this is ultimately
	// quadratic in nature.
	if (Depth >= MaxDepth)
	return SDValue();

	// Directly rip through bitcasts to find the underlying operand.
	SDValue Op = SrcOps[SrcOpIndex];
	Op = peekThroughOneUseBitcasts(Op);

	EVT VT = Op.getValueType();
	if (!VT.isVector() \|\| !VT.isSimple())
	return SDValue(); // Bail if we hit a non-simple non-vector.

	// FIXME: Just bail on f16 for now.
	if (VT.getVectorElementType() == MVT::f16)
	return SDValue();

	assert((RootSizeInBits % VT.getSizeInBits()) == 0 &&
	"Can only combine shuffles upto size of the root op.");

	// Create a demanded elts mask from the referenced elements of Op.
	APInt OpDemandedElts = APInt::getZero(RootMask.size());
	for (int M : RootMask) {
	int BaseIdx = RootMask.size() * SrcOpIndex;
	if (isInRange(M, BaseIdx, BaseIdx + RootMask.size()))
	OpDemandedElts.setBit(M - BaseIdx);
	}
	if (RootSizeInBits != VT.getSizeInBits()) {
	// Op is smaller than Root - extract the demanded elts for the subvector.
	unsigned Scale = RootSizeInBits / VT.getSizeInBits();
	unsigned NumOpMaskElts = RootMask.size() / Scale;
	assert((RootMask.size() % Scale) == 0 && "Root mask size mismatch");
	assert(OpDemandedElts
	.extractBits(RootMask.size() - NumOpMaskElts, NumOpMaskElts)
	.isZero() &&
	"Out of range elements referenced in root mask");
	OpDemandedElts = OpDemandedElts.extractBits(NumOpMaskElts, 0);
	}
	OpDemandedElts =
	APIntOps::ScaleBitMask(OpDemandedElts, VT.getVectorNumElements());

	// Extract target shuffle mask and resolve sentinels and inputs.
	SmallVector<int, 64> OpMask;
	SmallVector<SDValue, 2> OpInputs;
	APInt OpUndef, OpZero;
	bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
	if (getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
	OpZero, DAG, Depth, false)) {
	// Shuffle inputs must not be larger than the shuffle result.
	// TODO: Relax this for single input faux shuffles (e.g. trunc).
	if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {
	return OpInput.getValueSizeInBits() > VT.getSizeInBits();
	}))
	return SDValue();
	} else if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	(RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0 &&
	!isNullConstant(Op.getOperand(1))) {
	SDValue SrcVec = Op.getOperand(0);
	int ExtractIdx = Op.getConstantOperandVal(1);
	unsigned NumElts = VT.getVectorNumElements();
	OpInputs.assign({SrcVec});
	OpMask.assign(NumElts, SM_SentinelUndef);
	std::iota(OpMask.begin(), OpMask.end(), ExtractIdx);
	OpZero = OpUndef = APInt::getNullValue(NumElts);
	} else {
	return SDValue();
	}

	// If the shuffle result was smaller than the root, we need to adjust the
	// mask indices and pad the mask with undefs.
	if (RootSizeInBits > VT.getSizeInBits()) {
	unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits();
	unsigned OpMaskSize = OpMask.size();
	if (OpInputs.size() > 1) {
	unsigned PaddedMaskSize = NumSubVecs * OpMaskSize;
	for (int &M : OpMask) {
	if (M < 0)
	continue;
	int EltIdx = M % OpMaskSize;
	int OpIdx = M / OpMaskSize;
	M = (PaddedMaskSize * OpIdx) + EltIdx;
	}
	}
	OpZero = OpZero.zext(NumSubVecs * OpMaskSize);
	OpUndef = OpUndef.zext(NumSubVecs * OpMaskSize);
	OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef);
	}

	SmallVector<int, 64> Mask;
	SmallVector<SDValue, 16> Ops;

	// We don't need to merge masks if the root is empty.
	bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1);
	if (EmptyRoot) {
	// Only resolve zeros if it will remove an input, otherwise we might end
	// up in an infinite loop.
	bool ResolveKnownZeros = true;
	if (!OpZero.isZero()) {
	APInt UsedInputs = APInt::getZero(OpInputs.size());
	for (int i = 0, e = OpMask.size(); i != e; ++i) {
	int M = OpMask[i];
	if (OpUndef[i] \|\| OpZero[i] \|\| isUndefOrZero(M))
	continue;
	UsedInputs.setBit(M / OpMask.size());
	if (UsedInputs.isAllOnes()) {
	ResolveKnownZeros = false;
	break;
	}
	}
	}
	resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero,
	ResolveKnownZeros);

	Mask = OpMask;
	Ops.append(OpInputs.begin(), OpInputs.end());
	} else {
	resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero);

	// Add the inputs to the Ops list, avoiding duplicates.
	Ops.append(SrcOps.begin(), SrcOps.end());

	auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
	// Attempt to find an existing match.
	SDValue InputBC = peekThroughBitcasts(Input);
	for (int i = 0, e = Ops.size(); i < e; ++i)
	if (InputBC == peekThroughBitcasts(Ops[i]))
	return i;
	// Match failed - should we replace an existing Op?
	if (InsertionPoint >= 0) {
	Ops[InsertionPoint] = Input;
	return InsertionPoint;
	}
	// Add to the end of the Ops list.
	Ops.push_back(Input);
	return Ops.size() - 1;
	};

	SmallVector<int, 2> OpInputIdx;
	for (SDValue OpInput : OpInputs)
	OpInputIdx.push_back(
	AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1));

	assert(((RootMask.size() > OpMask.size() &&
	RootMask.size() % OpMask.size() == 0) \|\|
	(OpMask.size() > RootMask.size() &&
	OpMask.size() % RootMask.size() == 0) \|\|
	OpMask.size() == RootMask.size()) &&
	"The smaller number of elements must divide the larger.");

	// This function can be performance-critical, so we rely on the power-of-2
	// knowledge that we have about the mask sizes to replace div/rem ops with
	// bit-masks and shifts.
	assert(isPowerOf2_32(RootMask.size()) &&
	"Non-power-of-2 shuffle mask sizes");
	assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
	unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
	unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());

	unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
	unsigned RootRatio =
	std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
	unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
	assert((RootRatio == 1 \|\| OpRatio == 1) &&
	"Must not have a ratio for both incoming and op masks!");

	assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
	assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
	assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
	unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
	unsigned OpRatioLog2 = countTrailingZeros(OpRatio);

	Mask.resize(MaskWidth, SM_SentinelUndef);

	// Merge this shuffle operation's mask into our accumulated mask. Note that
	// this shuffle's mask will be the first applied to the input, followed by
	// the root mask to get us all the way to the root value arrangement. The
	// reason for this order is that we are recursing up the operation chain.
	for (unsigned i = 0; i < MaskWidth; ++i) {
	unsigned RootIdx = i >> RootRatioLog2;
	if (RootMask[RootIdx] < 0) {
	// This is a zero or undef lane, we're done.
	Mask[i] = RootMask[RootIdx];
	continue;
	}

	unsigned RootMaskedIdx =
	RootRatio == 1
	? RootMask[RootIdx]
	: (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));

	// Just insert the scaled root mask value if it references an input other
	// than the SrcOp we're currently inserting.
	if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) \|\|
	(((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
	Mask[i] = RootMaskedIdx;
	continue;
	}

	RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
	unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
	if (OpMask[OpIdx] < 0) {
	// The incoming lanes are zero or undef, it doesn't matter which ones we
	// are using.
	Mask[i] = OpMask[OpIdx];
	continue;
	}

	// Ok, we have non-zero lanes, map them through to one of the Op's inputs.
	unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx]
	: (OpMask[OpIdx] << OpRatioLog2) +
	(RootMaskedIdx & (OpRatio - 1));

	OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
	int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
	assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input");
	OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;

	Mask[i] = OpMaskedIdx;
	}
	}

	// Remove unused/repeated shuffle source ops.
	resolveTargetShuffleInputsAndMask(Ops, Mask);

	// Handle the all undef/zero/ones cases early.
	if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
	return DAG.getUNDEF(RootVT);
	if (all_of(Mask, [](int Idx) { return Idx < 0; }))
	return getZeroVector(RootVT, Subtarget, DAG, SDLoc(Root));
	if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) &&
	!llvm::is_contained(Mask, SM_SentinelZero))
	return getOnesVector(RootVT, DAG, SDLoc(Root));

	assert(!Ops.empty() && "Shuffle with no inputs detected");
	HasVariableMask \|= IsOpVariableMask;

	// Update the list of shuffle nodes that have been combined so far.
	SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(),
	SrcNodes.end());
	CombinedNodes.push_back(Op.getNode());

	// See if we can recurse into each shuffle source op (if it's a target
	// shuffle). The source op should only be generally combined if it either has
	// a single use (i.e. current Op) or all its users have already been combined,
	// if not then we can still combine but should prevent generation of variable
	// shuffles to avoid constant pool bloat.
	// Don't recurse if we already have more source ops than we can combine in
	// the remaining recursion depth.
	if (Ops.size() < (MaxDepth - Depth)) {
	for (int i = 0, e = Ops.size(); i < e; ++i) {
	// For empty roots, we need to resolve zeroable elements before combining
	// them with other shuffles.
	SmallVector<int, 64> ResolvedMask = Mask;
	if (EmptyRoot)
	resolveTargetShuffleFromZeroables(ResolvedMask, OpUndef, OpZero);
	bool AllowCrossLaneVar = false;
	bool AllowPerLaneVar = false;
	if (Ops[i].getNode()->hasOneUse() \|\|
	SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) {
	AllowCrossLaneVar = AllowVariableCrossLaneMask;
	AllowPerLaneVar = AllowVariablePerLaneMask;
	}
	if (SDValue Res = combineX86ShufflesRecursively(
	Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
	HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG,
	Subtarget))
	return Res;
	}
	}

	// Attempt to constant fold all of the constant source ops.
	if (SDValue Cst = combineX86ShufflesConstants(
	Ops, Mask, Root, HasVariableMask, DAG, Subtarget))
	return Cst;

	// If constant fold failed and we only have constants - then we have
	// multiple uses by a single non-variable shuffle - just bail.
	if (Depth == 0 && llvm::all_of(Ops, [&](SDValue Op) {
	APInt UndefElts;
	SmallVector<APInt> RawBits;
	unsigned EltSizeInBits = RootSizeInBits / Mask.size();
	return getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
	RawBits);
	})) {
	return SDValue();
	}

	// Canonicalize the combined shuffle mask chain with horizontal ops.
	// NOTE: This will update the Ops and Mask.
	if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
	Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
	return DAG.getBitcast(RootVT, HOp);

	// Try to refine our inputs given our knowledge of target shuffle mask.
	for (auto I : enumerate(Ops)) {
	int OpIdx = I.index();
	SDValue &Op = I.value();

	// What range of shuffle mask element values results in picking from Op?
	int Lo = OpIdx * Mask.size();
	int Hi = Lo + Mask.size();

	// Which elements of Op do we demand, given the mask's granularity?
	APInt OpDemandedElts(Mask.size(), 0);
	for (int MaskElt : Mask) {
	if (isInRange(MaskElt, Lo, Hi)) { // Picks from Op?
	int OpEltIdx = MaskElt - Lo;
	OpDemandedElts.setBit(OpEltIdx);
	}
	}

	// Is the shuffle result smaller than the root?
	if (Op.getValueSizeInBits() < RootSizeInBits) {
	// We padded the mask with undefs. But we now need to undo that.
	unsigned NumExpectedVectorElts = Mask.size();
	unsigned EltSizeInBits = RootSizeInBits / NumExpectedVectorElts;
	unsigned NumOpVectorElts = Op.getValueSizeInBits() / EltSizeInBits;
	assert(!OpDemandedElts.extractBits(
	NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) &&
	"Demanding the virtual undef widening padding?");
	OpDemandedElts = OpDemandedElts.trunc(NumOpVectorElts); // NUW
	}

	// The Op itself may be of different VT, so we need to scale the mask.
	unsigned NumOpElts = Op.getValueType().getVectorNumElements();
	APInt OpScaledDemandedElts = APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);

	// Can this operand be simplified any further, given it's demanded elements?
	if (SDValue NewOp =
	DAG.getTargetLoweringInfo().SimplifyMultipleUseDemandedVectorElts(
	Op, OpScaledDemandedElts, DAG))
	Op = NewOp;
	}
	// FIXME: should we rerun resolveTargetShuffleInputsAndMask() now?

	// Widen any subvector shuffle inputs we've collected.
	// TODO: Remove this to avoid generating temporary nodes, we should only
	// widen once combineX86ShuffleChain has found a match.
	if (any_of(Ops, [RootSizeInBits](SDValue Op) {
	return Op.getValueSizeInBits() < RootSizeInBits;
	})) {
	for (SDValue &Op : Ops)
	if (Op.getValueSizeInBits() < RootSizeInBits)
	Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op),
	RootSizeInBits);
	// Reresolve - we might have repeated subvector sources.
	resolveTargetShuffleInputsAndMask(Ops, Mask);
	}

	// We can only combine unary and binary shuffle mask cases.
	if (Ops.size() <= 2) {
	// Minor canonicalization of the accumulated shuffle mask to make it easier
	// to match below. All this does is detect masks with sequential pairs of
	// elements, and shrink them to the half-width mask. It does this in a loop
	// so it will reduce the size of the mask to the minimal width mask which
	// performs an equivalent shuffle.
	while (Mask.size() > 1) {
	SmallVector<int, 64> WidenedMask;
	if (!canWidenShuffleElements(Mask, WidenedMask))
	break;
	Mask = std::move(WidenedMask);
	}

	// Canonicalization of binary shuffle masks to improve pattern matching by
	// commuting the inputs.
	if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(Ops[0], Ops[1]);
	}

	// Try to combine into a single shuffle instruction.
	if (SDValue Shuffle = combineX86ShuffleChain(
	Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask,
	AllowVariablePerLaneMask, DAG, Subtarget))
	return Shuffle;

	// If all the operands come from the same larger vector, fallthrough and try
	// to use combineX86ShuffleChainWithExtract.
	SDValue LHS = peekThroughBitcasts(Ops.front());
	SDValue RHS = peekThroughBitcasts(Ops.back());
	if (Ops.size() != 2 \|\| !Subtarget.hasAVX2() \|\| RootSizeInBits != 128 \|\|
	(RootSizeInBits / Mask.size()) != 64 \|\|
	LHS.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	RHS.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	LHS.getOperand(0) != RHS.getOperand(0))
	return SDValue();
	}

	// If that failed and any input is extracted then try to combine as a
	// shuffle with the larger type.
	return combineX86ShuffleChainWithExtract(
	Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask,
	AllowVariablePerLaneMask, DAG, Subtarget);
	}

	/// Helper entry wrapper to combineX86ShufflesRecursively.
	static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	return combineX86ShufflesRecursively(
	{Op}, 0, Op, {0}, {}, /Depth/ 0, X86::MaxShuffleCombineDepth,
	/HasVarMask/ false,
	/AllowCrossLaneVarMask/ true, /AllowPerLaneVarMask/ true, DAG,
	Subtarget);
	}

	/// Get the PSHUF-style mask from PSHUF node.
	///
	/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
	/// PSHUF-style masks that can be reused with such instructions.
	static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
	MVT VT = N.getSimpleValueType();
	SmallVector<int, 4> Mask;
	SmallVector<SDValue, 2> Ops;
	bool HaveMask =
	getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask);
	(void)HaveMask;
	assert(HaveMask);

	// If we have more than 128-bits, only the low 128-bits of shuffle mask
	// matter. Check that the upper masks are repeats and remove them.
	if (VT.getSizeInBits() > 128) {
	int LaneElts = 128 / VT.getScalarSizeInBits();
	#ifndef NDEBUG
	for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
	for (int j = 0; j < LaneElts; ++j)
	assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
	"Mask doesn't repeat in high 128-bit lanes!");
	#endif
	Mask.resize(LaneElts);
	}

	switch (N.getOpcode()) {
	case X86ISD::PSHUFD:
	return Mask;
	case X86ISD::PSHUFLW:
	Mask.resize(4);
	return Mask;
	case X86ISD::PSHUFHW:
	Mask.erase(Mask.begin(), Mask.begin() + 4);
	for (int &M : Mask)
	M -= 4;
	return Mask;
	default:
	llvm_unreachable("No valid shuffle instruction found!");
	}
	}

	/// Search for a combinable shuffle across a chain ending in pshufd.
	///
	/// We walk up the chain and look for a combinable shuffle, skipping over
	/// shuffles that we could hoist this shuffle's transformation past without
	/// altering anything.
	static SDValue
	combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(N.getOpcode() == X86ISD::PSHUFD &&
	"Called with something other than an x86 128-bit half shuffle!");
	SDLoc DL(N);

	// Walk up a single-use chain looking for a combinable shuffle. Keep a stack
	// of the shuffles in the chain so that we can form a fresh chain to replace
	// this one.
	SmallVector<SDValue, 8> Chain;
	SDValue V = N.getOperand(0);
	for (; V.hasOneUse(); V = V.getOperand(0)) {
	switch (V.getOpcode()) {
	default:
	return SDValue(); // Nothing combined!

	case ISD::BITCAST:
	// Skip bitcasts as we always know the type for the target specific
	// instructions.
	continue;

	case X86ISD::PSHUFD:
	// Found another dword shuffle.
	break;

	case X86ISD::PSHUFLW:
	// Check that the low words (being shuffled) are the identity in the
	// dword shuffle, and the high words are self-contained.
	if (Mask[0] != 0 \|\| Mask[1] != 1 \|\|
	!(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
	return SDValue();

	Chain.push_back(V);
	continue;

	case X86ISD::PSHUFHW:
	// Check that the high words (being shuffled) are the identity in the
	// dword shuffle, and the low words are self-contained.
	if (Mask[2] != 2 \|\| Mask[3] != 3 \|\|
	!(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
	return SDValue();

	Chain.push_back(V);
	continue;

	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
	// shuffle into a preceding word shuffle.
	if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
	V.getSimpleValueType().getVectorElementType() != MVT::i16)
	return SDValue();

	// Search for a half-shuffle which we can combine with.
	unsigned CombineOp =
	V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
	if (V.getOperand(0) != V.getOperand(1) \|\|
	!V->isOnlyUserOf(V.getOperand(0).getNode()))
	return SDValue();
	Chain.push_back(V);
	V = V.getOperand(0);
	do {
	switch (V.getOpcode()) {
	default:
	return SDValue(); // Nothing to combine.

	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	if (V.getOpcode() == CombineOp)
	break;

	Chain.push_back(V);

	[[fallthrough]];
	case ISD::BITCAST:
	V = V.getOperand(0);
	continue;
	}
	break;
	} while (V.hasOneUse());
	break;
	}
	// Break out of the loop if we break out of the switch.
	break;
	}

	if (!V.hasOneUse())
	// We fell out of the loop without finding a viable combining instruction.
	return SDValue();

	// Merge this node's mask and our incoming mask.
	SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
	for (int &M : Mask)
	M = VMask[M];
	V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));

	// Rebuild the chain around this new shuffle.
	while (!Chain.empty()) {
	SDValue W = Chain.pop_back_val();

	if (V.getValueType() != W.getOperand(0).getValueType())
	V = DAG.getBitcast(W.getOperand(0).getValueType(), V);

	switch (W.getOpcode()) {
	default:
	llvm_unreachable("Only PSHUF and UNPCK instructions get here!");

	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V);
	break;

	case X86ISD::PSHUFD:
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1));
	break;
	}
	}
	if (V.getValueType() != N.getValueType())
	V = DAG.getBitcast(N.getValueType(), V);

	// Return the new chain to replace N.
	return V;
	}

	// Attempt to commute shufps LHS loads:
	// permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
	static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
	SelectionDAG &DAG) {
	// TODO: Add vXf64 support.
	if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32)
	return SDValue();

	// SHUFP(LHS, RHS) -> SHUFP(RHS, LHS) iff LHS is foldable + RHS is not.
	auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) {
	if (V.getOpcode() != X86ISD::SHUFP \|\| !Parent->isOnlyUserOf(V.getNode()))
	return SDValue();
	SDValue N0 = V.getOperand(0);
	SDValue N1 = V.getOperand(1);
	unsigned Imm = V.getConstantOperandVal(2);
	const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
	if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) \|\|
	X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
	return SDValue();
	Imm = ((Imm & 0x0F) << 4) \| ((Imm & 0xF0) >> 4);
	return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
	DAG.getTargetConstant(Imm, DL, MVT::i8));
	};

	switch (N.getOpcode()) {
	case X86ISD::VPERMILPI:
	if (SDValue NewSHUFP = commuteSHUFP(N, N.getOperand(0))) {
	unsigned Imm = N.getConstantOperandVal(1);
	return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP,
	DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8));
	}
	break;
	case X86ISD::SHUFP: {
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);
	unsigned Imm = N.getConstantOperandVal(2);
	if (N0 == N1) {
	if (SDValue NewSHUFP = commuteSHUFP(N, N0))
	return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP,
	DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8));
	} else if (SDValue NewSHUFP = commuteSHUFP(N, N0)) {
	return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1,
	DAG.getTargetConstant(Imm ^ 0x0A, DL, MVT::i8));
	} else if (SDValue NewSHUFP = commuteSHUFP(N, N1)) {
	return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP,
	DAG.getTargetConstant(Imm ^ 0xA0, DL, MVT::i8));
	}
	break;
	}
	}

	return SDValue();
	}

	// Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
	static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
	const SDLoc &DL) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT ShuffleVT = N.getValueType();

	auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) {
	// AllZeros/AllOnes constants are freely shuffled and will peek through
	// bitcasts. Other constant build vectors do not peek through bitcasts. Only
	// merge with target shuffles if it has one use so shuffle combining is
	// likely to kick in. Shuffles of splats are expected to be removed.
	return ISD::isBuildVectorAllOnes(Op.getNode()) \|\|
	ISD::isBuildVectorAllZeros(Op.getNode()) \|\|
	ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) \|\|
	ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) \|\|
	(isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) \|\|
	(FoldLoad && isShuffleFoldableLoad(Op)) \|\|
	DAG.isSplatValue(Op, /AllowUndefs/ false);
	};
	auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) {
	// Ensure we only shuffle whole vector src elements, unless its a logical
	// binops where we can more aggressively move shuffles from dst to src.
	return BinOp == ISD::AND \|\| BinOp == ISD::OR \|\| BinOp == ISD::XOR \|\|
	BinOp == X86ISD::ANDNP \|\|
	(Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits());
	};

	unsigned Opc = N.getOpcode();
	switch (Opc) {
	// Unary and Unary+Permute Shuffles.
	case X86ISD::PSHUFB: {
	// Don't merge PSHUFB if it contains zero'd elements.
	SmallVector<int> Mask;
	SmallVector<SDValue> Ops;
	if (!getTargetShuffleMask(N.getNode(), ShuffleVT.getSimpleVT(), false, Ops,
	Mask))
	break;
	[[fallthrough]];
	}
	case X86ISD::VBROADCAST:
	case X86ISD::MOVDDUP:
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFHW:
	case X86ISD::PSHUFLW:
	case X86ISD::VPERMI:
	case X86ISD::VPERMILPI: {
	if (N.getOperand(0).getValueType() == ShuffleVT &&
	N->isOnlyUserOf(N.getOperand(0).getNode())) {
	SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
	unsigned SrcOpcode = N0.getOpcode();
	if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
	SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
	SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
	if (IsMergeableWithShuffle(Op00, Opc != X86ISD::PSHUFB) \|\|
	IsMergeableWithShuffle(Op01, Opc != X86ISD::PSHUFB)) {
	SDValue LHS, RHS;
	Op00 = DAG.getBitcast(ShuffleVT, Op00);
	Op01 = DAG.getBitcast(ShuffleVT, Op01);
	if (N.getNumOperands() == 2) {
	LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1));
	RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1));
	} else {
	LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00);
	RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01);
	}
	EVT OpVT = N0.getValueType();
	return DAG.getBitcast(ShuffleVT,
	DAG.getNode(SrcOpcode, DL, OpVT,
	DAG.getBitcast(OpVT, LHS),
	DAG.getBitcast(OpVT, RHS)));
	}
	}
	}
	break;
	}
	// Binary and Binary+Permute Shuffles.
	case X86ISD::INSERTPS: {
	// Don't merge INSERTPS if it contains zero'd elements.
	unsigned InsertPSMask = N.getConstantOperandVal(2);
	unsigned ZeroMask = InsertPSMask & 0xF;
	if (ZeroMask != 0)
	break;
	[[fallthrough]];
	}
	case X86ISD::MOVSD:
	case X86ISD::MOVSS:
	case X86ISD::BLENDI:
	case X86ISD::SHUFP:
	case X86ISD::UNPCKH:
	case X86ISD::UNPCKL: {
	if (N->isOnlyUserOf(N.getOperand(0).getNode()) &&
	N->isOnlyUserOf(N.getOperand(1).getNode())) {
	SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
	SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
	unsigned SrcOpcode = N0.getOpcode();
	if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode &&
	IsSafeToMoveShuffle(N0, SrcOpcode) &&
	IsSafeToMoveShuffle(N1, SrcOpcode)) {
	SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
	SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0));
	SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
	SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1));
	// Ensure the total number of shuffles doesn't increase by folding this
	// shuffle through to the source ops.
	if (((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) \|\|
	(IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) \|\|
	((IsMergeableWithShuffle(Op00) \|\| IsMergeableWithShuffle(Op10)) &&
	(IsMergeableWithShuffle(Op01) \|\| IsMergeableWithShuffle(Op11)))) {
	SDValue LHS, RHS;
	Op00 = DAG.getBitcast(ShuffleVT, Op00);
	Op10 = DAG.getBitcast(ShuffleVT, Op10);
	Op01 = DAG.getBitcast(ShuffleVT, Op01);
	Op11 = DAG.getBitcast(ShuffleVT, Op11);
	if (N.getNumOperands() == 3) {
	LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2));
	RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11, N.getOperand(2));
	} else {
	LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10);
	RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11);
	}
	EVT OpVT = N0.getValueType();
	return DAG.getBitcast(ShuffleVT,
	DAG.getNode(SrcOpcode, DL, OpVT,
	DAG.getBitcast(OpVT, LHS),
	DAG.getBitcast(OpVT, RHS)));
	}
	}
	}
	break;
	}
	}
	return SDValue();
	}

	/// Attempt to fold vpermf128(op(),op()) -> op(vpermf128(),vpermf128()).
	static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
	SelectionDAG &DAG,
	const SDLoc &DL) {
	assert(V.getOpcode() == X86ISD::VPERM2X128 && "Unknown lane shuffle");

	MVT VT = V.getSimpleValueType();
	SDValue Src0 = peekThroughBitcasts(V.getOperand(0));
	SDValue Src1 = peekThroughBitcasts(V.getOperand(1));
	unsigned SrcOpc0 = Src0.getOpcode();
	unsigned SrcOpc1 = Src1.getOpcode();
	EVT SrcVT0 = Src0.getValueType();
	EVT SrcVT1 = Src1.getValueType();

	if (!Src1.isUndef() && (SrcVT0 != SrcVT1 \|\| SrcOpc0 != SrcOpc1))
	return SDValue();

	switch (SrcOpc0) {
	case X86ISD::MOVDDUP: {
	SDValue LHS = Src0.getOperand(0);
	SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0);
	SDValue Res =
	DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, V.getOperand(2));
	Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res);
	return DAG.getBitcast(VT, Res);
	}
	case X86ISD::VPERMILPI:
	// TODO: Handle v4f64 permutes with different low/high lane masks.
	if (SrcVT0 == MVT::v4f64) {
	uint64_t Mask = Src0.getConstantOperandVal(1);
	if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
	break;
	}
	[[fallthrough]];
	case X86ISD::VSHLI:
	case X86ISD::VSRLI:
	case X86ISD::VSRAI:
	case X86ISD::PSHUFD:
	if (Src1.isUndef() \|\| Src0.getOperand(1) == Src1.getOperand(1)) {
	SDValue LHS = Src0.getOperand(0);
	SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0);
	SDValue Res = DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS,
	V.getOperand(2));
	Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res, Src0.getOperand(1));
	return DAG.getBitcast(VT, Res);
	}
	break;
	}

	return SDValue();
	}

	/// Try to combine x86 target specific shuffles.
	static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	MVT VT = N.getSimpleValueType();
	SmallVector<int, 4> Mask;
	unsigned Opcode = N.getOpcode();

	if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
	return R;

	// Handle specific target shuffles.
	switch (Opcode) {
	case X86ISD::MOVDDUP: {
	SDValue Src = N.getOperand(0);
	// Turn a 128-bit MOVDDUP of a full vector load into movddup+vzload.
	if (VT == MVT::v2f64 && Src.hasOneUse() &&
	ISD::isNormalLoad(Src.getNode())) {
	LoadSDNode *LN = cast<LoadSDNode>(Src);
	if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::f64, MVT::v2f64, DAG)) {
	SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad);
	DCI.CombineTo(N.getNode(), Movddup);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return N; // Return N so it doesn't get rechecked!
	}
	}

	return SDValue();
	}
	case X86ISD::VBROADCAST: {
	SDValue Src = N.getOperand(0);
	SDValue BC = peekThroughBitcasts(Src);
	EVT SrcVT = Src.getValueType();
	EVT BCVT = BC.getValueType();

	// If broadcasting from another shuffle, attempt to simplify it.
	// TODO - we really need a general SimplifyDemandedVectorElts mechanism.
	if (isTargetShuffle(BC.getOpcode()) &&
	VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {
	unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();
	SmallVector<int, 16> DemandedMask(BCVT.getVectorNumElements(),
	SM_SentinelUndef);
	for (unsigned i = 0; i != Scale; ++i)
	DemandedMask[i] = i;
	if (SDValue Res = combineX86ShufflesRecursively(
	{BC}, 0, BC, DemandedMask, {}, /Depth/ 0,
	X86::MaxShuffleCombineDepth,
	/HasVarMask/ false, /AllowCrossLaneVarMask/ true,
	/AllowPerLaneVarMask/ true, DAG, Subtarget))
	return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
	DAG.getBitcast(SrcVT, Res));
	}

	// broadcast(bitcast(src)) -> bitcast(broadcast(src))
	// 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
	if (Src.getOpcode() == ISD::BITCAST &&
	SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() &&
	DAG.getTargetLoweringInfo().isTypeLegal(BCVT) &&
	FixedVectorType::isValidElementType(
	BCVT.getScalarType().getTypeForEVT(*DAG.getContext()))) {
	EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
	VT.getVectorNumElements());
	return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
	}

	// vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(src))
	// If we're re-broadcasting a smaller type then broadcast with that type and
	// bitcast.
	// TODO: Do this for any splat?
	if (Src.getOpcode() == ISD::BITCAST &&
	(BC.getOpcode() == X86ISD::VBROADCAST \|\|
	BC.getOpcode() == X86ISD::VBROADCAST_LOAD) &&
	(VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) == 0 &&
	(VT.getSizeInBits() % BCVT.getSizeInBits()) == 0) {
	MVT NewVT =
	MVT::getVectorVT(BCVT.getSimpleVT().getScalarType(),
	VT.getSizeInBits() / BCVT.getScalarSizeInBits());
	return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
	}

	// Reduce broadcast source vector to lowest 128-bits.
	if (SrcVT.getSizeInBits() > 128)
	return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
	extract128BitVector(Src, 0, DAG, DL));

	// broadcast(scalar_to_vector(x)) -> broadcast(x).
	if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
	return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));

	// broadcast(extract_vector_elt(x, 0)) -> broadcast(x).
	if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	isNullConstant(Src.getOperand(1)) &&
	DAG.getTargetLoweringInfo().isTypeLegal(
	Src.getOperand(0).getValueType()))
	return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));

	// Share broadcast with the longest vector and extract low subvector (free).
	// Ensure the same SDValue from the SDNode use is being used.
	for (SDNode *User : Src->uses())
	if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
	Src == User->getOperand(0) &&
	User->getValueSizeInBits(0).getFixedValue() >
	VT.getFixedSizeInBits()) {
	return extractSubVector(SDValue(User, 0), 0, DAG, DL,
	VT.getSizeInBits());
	}

	// vbroadcast(scalarload X) -> vbroadcast_load X
	// For float loads, extract other uses of the scalar from the broadcast.
	if (!SrcVT.isVector() && (Src.hasOneUse() \|\| VT.isFloatingPoint()) &&
	ISD::isNormalLoad(Src.getNode())) {
	LoadSDNode *LN = cast<LoadSDNode>(Src);
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
	SDValue BcastLd =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
	LN->getMemoryVT(), LN->getMemOperand());
	// If the load value is used only by N, replace it via CombineTo N.
	bool NoReplaceExtract = Src.hasOneUse();
	DCI.CombineTo(N.getNode(), BcastLd);
	if (NoReplaceExtract) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	} else {
	SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd,
	DAG.getIntPtrConstant(0, DL));
	DCI.CombineTo(LN, Scl, BcastLd.getValue(1));
	}
	return N; // Return N so it doesn't get rechecked!
	}

	// Due to isTypeDesirableForOp, we won't always shrink a load truncated to
	// i16. So shrink it ourselves if we can make a broadcast_load.
	if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE &&
	Src.hasOneUse() && Src.getOperand(0).hasOneUse()) {
	assert(Subtarget.hasAVX2() && "Expected AVX2");
	SDValue TruncIn = Src.getOperand(0);

	// If this is a truncate of a non extending load we can just narrow it to
	// use a broadcast_load.
	if (ISD::isNormalLoad(TruncIn.getNode())) {
	LoadSDNode *LN = cast<LoadSDNode>(TruncIn);
	// Unless its volatile or atomic.
	if (LN->isSimple()) {
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
	SDValue BcastLd = DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
	LN->getPointerInfo(), LN->getOriginalAlign(),
	LN->getMemOperand()->getFlags());
	DCI.CombineTo(N.getNode(), BcastLd);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(Src.getNode());
	return N; // Return N so it doesn't get rechecked!
	}
	}

	// If this is a truncate of an i16 extload, we can directly replace it.
	if (ISD::isUNINDEXEDLoad(Src.getOperand(0).getNode()) &&
	ISD::isEXTLoad(Src.getOperand(0).getNode())) {
	LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
	if (LN->getMemoryVT().getSizeInBits() == 16) {
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
	SDValue BcastLd =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
	LN->getMemoryVT(), LN->getMemOperand());
	DCI.CombineTo(N.getNode(), BcastLd);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(Src.getNode());
	return N; // Return N so it doesn't get rechecked!
	}
	}

	// If this is a truncate of load that has been shifted right, we can
	// offset the pointer and use a narrower load.
	if (TruncIn.getOpcode() == ISD::SRL &&
	TruncIn.getOperand(0).hasOneUse() &&
	isa<ConstantSDNode>(TruncIn.getOperand(1)) &&
	ISD::isNormalLoad(TruncIn.getOperand(0).getNode())) {
	LoadSDNode *LN = cast<LoadSDNode>(TruncIn.getOperand(0));
	unsigned ShiftAmt = TruncIn.getConstantOperandVal(1);
	// Make sure the shift amount and the load size are divisible by 16.
	// Don't do this if the load is volatile or atomic.
	if (ShiftAmt % 16 == 0 && TruncIn.getValueSizeInBits() % 16 == 0 &&
	LN->isSimple()) {
	unsigned Offset = ShiftAmt / 8;
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(),
	TypeSize::Fixed(Offset), DL);
	SDValue Ops[] = { LN->getChain(), Ptr };
	SDValue BcastLd = DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
	LN->getPointerInfo().getWithOffset(Offset),
	LN->getOriginalAlign(),
	LN->getMemOperand()->getFlags());
	DCI.CombineTo(N.getNode(), BcastLd);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(Src.getNode());
	return N; // Return N so it doesn't get rechecked!
	}
	}
	}

	// vbroadcast(vzload X) -> vbroadcast_load X
	if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) {
	MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
	if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) {
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
	SDValue BcastLd =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
	LN->getMemoryVT(), LN->getMemOperand());
	DCI.CombineTo(N.getNode(), BcastLd);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return N; // Return N so it doesn't get rechecked!
	}
	}

	// vbroadcast(vector load X) -> vbroadcast_load
	if ((SrcVT == MVT::v2f64 \|\| SrcVT == MVT::v4f32 \|\| SrcVT == MVT::v2i64 \|\|
	SrcVT == MVT::v4i32) &&
	Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) {
	LoadSDNode *LN = cast<LoadSDNode>(Src);
	// Unless the load is volatile or atomic.
	if (LN->isSimple()) {
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
	SDValue BcastLd = DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(),
	LN->getPointerInfo(), LN->getOriginalAlign(),
	LN->getMemOperand()->getFlags());
	DCI.CombineTo(N.getNode(), BcastLd);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return N; // Return N so it doesn't get rechecked!
	}
	}

	return SDValue();
	}
	case X86ISD::VZEXT_MOVL: {
	SDValue N0 = N.getOperand(0);

	// If this a vzmovl of a full vector load, replace it with a vzload, unless
	// the load is volatile.
	if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) {
	auto *LN = cast<LoadSDNode>(N0);
	if (SDValue VZLoad =
	narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) {
	DCI.CombineTo(N.getNode(), VZLoad);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return N;
	}
	}

	// If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast
	// and can just use a VZEXT_LOAD.
	// FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
	if (N0.hasOneUse() && N0.getOpcode() == X86ISD::VBROADCAST_LOAD) {
	auto *LN = cast<MemSDNode>(N0);
	if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
	SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
	SDValue VZLoad =
	DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
	LN->getMemoryVT(), LN->getMemOperand());
	DCI.CombineTo(N.getNode(), VZLoad);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return N;
	}
	}

	// Turn (v2i64 (vzext_movl (scalar_to_vector (i64 X)))) into
	// (v2i64 (bitcast (v4i32 (vzext_movl (scalar_to_vector (i32 (trunc X)))))))
	// if the upper bits of the i64 are zero.
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	N0.getOperand(0).hasOneUse() &&
	N0.getOperand(0).getValueType() == MVT::i64) {
	SDValue In = N0.getOperand(0);
	APInt Mask = APInt::getHighBitsSet(64, 32);
	if (DAG.MaskedValueIsZero(In, Mask)) {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In);
	MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
	SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc);
	SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec);
	return DAG.getBitcast(VT, Movl);
	}
	}

	// Load a scalar integer constant directly to XMM instead of transferring an
	// immediate value from GPR.
	// vzext_movl (scalar_to_vector C) --> load [C,0...]
	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	if (auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
	// Create a vector constant - scalar constant followed by zeros.
	EVT ScalarVT = N0.getOperand(0).getValueType();
	Type ScalarTy = ScalarVT.getTypeForEVT(DAG.getContext());
	unsigned NumElts = VT.getVectorNumElements();
	Constant *Zero = ConstantInt::getNullValue(ScalarTy);
	SmallVector<Constant *, 32> ConstantVec(NumElts, Zero);
	ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue());

	// Load the vector constant from constant pool.
	MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT);
	MachinePointerInfo MPI =
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
	Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
	return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment,
	MachineMemOperand::MOLoad);
	}
	}

	// Pull subvector inserts into undef through VZEXT_MOVL by making it an
	// insert into a zero vector. This helps get VZEXT_MOVL closer to
	// scalar_to_vectors where 256/512 are canonicalized to an insert and a
	// 128-bit scalar_to_vector. This reduces the number of isel patterns.
	if (!DCI.isBeforeLegalizeOps() && N0.hasOneUse()) {
	SDValue V = peekThroughOneUseBitcasts(N0);

	if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() &&
	isNullConstant(V.getOperand(2))) {
	SDValue In = V.getOperand(1);
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
	In.getValueSizeInBits() /
	VT.getScalarSizeInBits());
	In = DAG.getBitcast(SubVT, In);
	SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
	getZeroVector(VT, Subtarget, DAG, DL), Movl,
	V.getOperand(2));
	}
	}

	return SDValue();
	}
	case X86ISD::BLENDI: {
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);

	// blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.
	// TODO: Handle MVT::v16i16 repeated blend mask.
	if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
	N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
	MVT SrcVT = N0.getOperand(0).getSimpleValueType();
	if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
	SrcVT.getScalarSizeInBits() >= 32) {
	unsigned BlendMask = N.getConstantOperandVal(2);
	unsigned Size = VT.getVectorNumElements();
	unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
	BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
	N1.getOperand(0),
	DAG.getTargetConstant(BlendMask, DL, MVT::i8)));
	}
	}
	return SDValue();
	}
	case X86ISD::SHUFP: {
	// Fold shufps(shuffle(x),shuffle(y)) -> shufps(x,y).
	// This is a more relaxed shuffle combiner that can ignore oneuse limits.
	// TODO: Support types other than v4f32.
	if (VT == MVT::v4f32) {
	bool Updated = false;
	SmallVector<int> Mask;
	SmallVector<SDValue> Ops;
	if (getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask) &&
	Ops.size() == 2) {
	for (int i = 0; i != 2; ++i) {
	SmallVector<SDValue> SubOps;
	SmallVector<int> SubMask, SubScaledMask;
	SDValue Sub = peekThroughBitcasts(Ops[i]);
	// TODO: Scaling might be easier if we specify the demanded elts.
	if (getTargetShuffleInputs(Sub, SubOps, SubMask, DAG, 0, false) &&
	scaleShuffleElements(SubMask, 4, SubScaledMask) &&
	SubOps.size() == 1 && isUndefOrInRange(SubScaledMask, 0, 4)) {
	int Ofs = i * 2;
	Mask[Ofs + 0] = SubScaledMask[Mask[Ofs + 0] % 4] + (i * 4);
	Mask[Ofs + 1] = SubScaledMask[Mask[Ofs + 1] % 4] + (i * 4);
	Ops[i] = DAG.getBitcast(VT, SubOps[0]);
	Updated = true;
	}
	}
	}
	if (Updated) {
	for (int &M : Mask)
	M %= 4;
	Ops.push_back(getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	return DAG.getNode(X86ISD::SHUFP, DL, VT, Ops);
	}
	}
	return SDValue();
	}
	case X86ISD::VPERMI: {
	// vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.
	// TODO: Remove when we have preferred domains in combineX86ShuffleChain.
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	if (N0.getOpcode() == ISD::BITCAST &&
	N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) {
	SDValue Src = N0.getOperand(0);
	EVT SrcVT = Src.getValueType();
	SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1);
	return DAG.getBitcast(VT, Res);
	}
	return SDValue();
	}
	case X86ISD::VPERM2X128: {
	// Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	if (LHS.getOpcode() == ISD::BITCAST &&
	(RHS.getOpcode() == ISD::BITCAST \|\| RHS.isUndef())) {
	EVT SrcVT = LHS.getOperand(0).getValueType();
	if (RHS.isUndef() \|\| SrcVT == RHS.getOperand(0).getValueType()) {
	return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT,
	DAG.getBitcast(SrcVT, LHS),
	DAG.getBitcast(SrcVT, RHS),
	N->getOperand(2)));
	}
	}

	// Fold vperm2x128(op(),op()) -> op(vperm2x128(),vperm2x128()).
	if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
	return Res;

	// Fold vperm2x128 subvector shuffle with an inner concat pattern.
	// vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.
	auto FindSubVector128 = [&](unsigned Idx) {
	if (Idx > 3)
	return SDValue();
	SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
	SmallVector<SDValue> SubOps;
	if (collectConcatOps(Src.getNode(), SubOps, DAG) && SubOps.size() == 2)
	return SubOps[Idx & 1];
	unsigned NumElts = Src.getValueType().getVectorNumElements();
	if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
	Src.getOperand(1).getValueSizeInBits() == 128 &&
	Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
	return Src.getOperand(1);
	}
	return SDValue();
	};
	unsigned Imm = N.getConstantOperandVal(2);
	if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
	if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
	MVT SubVT = VT.getHalfNumVectorElementsVT();
	SubLo = DAG.getBitcast(SubVT, SubLo);
	SubHi = DAG.getBitcast(SubVT, SubHi);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
	}
	}
	return SDValue();
	}
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	Mask = getPSHUFShuffleMask(N);
	assert(Mask.size() == 4);
	break;
	case X86ISD::MOVSD:
	case X86ISD::MOVSH:
	case X86ISD::MOVSS: {
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);

	// Canonicalize scalar FPOps:
	// MOVS(N0, OP(N0, N1)) --> MOVS(N0, SCALAR_TO_VECTOR(OP(N0[0], N1[0])))
	// If commutable, allow OP(N1[0], N0[0]).
	unsigned Opcode1 = N1.getOpcode();
	if (Opcode1 == ISD::FADD \|\| Opcode1 == ISD::FMUL \|\| Opcode1 == ISD::FSUB \|\|
	Opcode1 == ISD::FDIV) {
	SDValue N10 = N1.getOperand(0);
	SDValue N11 = N1.getOperand(1);
	if (N10 == N0 \|\|
	(N11 == N0 && (Opcode1 == ISD::FADD \|\| Opcode1 == ISD::FMUL))) {
	if (N10 != N0)
	std::swap(N10, N11);
	MVT SVT = VT.getVectorElementType();
	SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
	N10 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N10, ZeroIdx);
	N11 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N11, ZeroIdx);
	SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11);
	SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
	return DAG.getNode(Opcode, DL, VT, N0, SclVec);
	}
	}

	return SDValue();
	}
	case X86ISD::INSERTPS: {
	assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32");
	SDValue Op0 = N.getOperand(0);
	SDValue Op1 = N.getOperand(1);
	unsigned InsertPSMask = N.getConstantOperandVal(2);
	unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
	unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
	unsigned ZeroMask = InsertPSMask & 0xF;

	// If we zero out all elements from Op0 then we don't need to reference it.
	if (((ZeroMask \| (1u << DstIdx)) == 0xF) && !Op0.isUndef())
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
	DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));

	// If we zero out the element from Op1 then we don't need to reference it.
	if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
	DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));

	// Attempt to merge insertps Op1 with an inner target shuffle node.
	SmallVector<int, 8> TargetMask1;
	SmallVector<SDValue, 2> Ops1;
	APInt KnownUndef1, KnownZero1;
	if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1,
	KnownZero1)) {
	if (KnownUndef1[SrcIdx] \|\| KnownZero1[SrcIdx]) {
	// Zero/UNDEF insertion - zero out element and remove dependency.
	InsertPSMask \|= (1u << DstIdx);
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
	DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
	}
	// Update insertps mask srcidx and reference the source input directly.
	int M = TargetMask1[SrcIdx];
	assert(0 <= M && M < 8 && "Shuffle index out of range");
	InsertPSMask = (InsertPSMask & 0x3f) \| ((M & 0x3) << 6);
	Op1 = Ops1[M < 4 ? 0 : 1];
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
	DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
	}

	// Attempt to merge insertps Op0 with an inner target shuffle node.
	SmallVector<int, 8> TargetMask0;
	SmallVector<SDValue, 2> Ops0;
	APInt KnownUndef0, KnownZero0;
	if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0,
	KnownZero0)) {
	bool Updated = false;
	bool UseInput00 = false;
	bool UseInput01 = false;
	for (int i = 0; i != 4; ++i) {
	if ((InsertPSMask & (1u << i)) \|\| (i == (int)DstIdx)) {
	// No change if element is already zero or the inserted element.
	continue;
	}

	if (KnownUndef0[i] \|\| KnownZero0[i]) {
	// If the target mask is undef/zero then we must zero the element.
	InsertPSMask \|= (1u << i);
	Updated = true;
	continue;
	}

	// The input vector element must be inline.
	int M = TargetMask0[i];
	if (M != i && M != (i + 4))
	return SDValue();

	// Determine which inputs of the target shuffle we're using.
	UseInput00 \|= (0 <= M && M < 4);
	UseInput01 \|= (4 <= M);
	}

	// If we're not using both inputs of the target shuffle then use the
	// referenced input directly.
	if (UseInput00 && !UseInput01) {
	Updated = true;
	Op0 = Ops0[0];
	} else if (!UseInput00 && UseInput01) {
	Updated = true;
	Op0 = Ops0[1];
	}

	if (Updated)
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
	DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
	}

	// If we're inserting an element from a vbroadcast load, fold the
	// load into the X86insertps instruction. We need to convert the scalar
	// load to a vector and clear the source lane of the INSERTPS control.
	if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) {
	auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
	if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
	SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
	MemIntr->getBasePtr(),
	MemIntr->getMemOperand());
	SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
	Load),
	DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
	DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
	return Insert;
	}
	}

	return SDValue();
	}
	default:
	return SDValue();
	}

	// Nuke no-op shuffles that show up after combining.
	if (isNoopShuffleMask(Mask))
	return N.getOperand(0);

	// Look for simplifications involving one or two shuffle instructions.
	SDValue V = N.getOperand(0);
	switch (N.getOpcode()) {
	default:
	break;
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");

	// See if this reduces to a PSHUFD which is no more expensive and can
	// combine with more operations. Note that it has to at least flip the
	// dwords as otherwise it would have been removed as a no-op.
	if (ArrayRef(Mask).equals({2, 3, 0, 1})) {
	int DMask[] = {0, 1, 2, 3};
	int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
	DMask[DOffset + 0] = DOffset + 1;
	DMask[DOffset + 1] = DOffset + 0;
	MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
	V = DAG.getBitcast(DVT, V);
	V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
	getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
	return DAG.getBitcast(VT, V);
	}

	// Look for shuffle patterns which can be implemented as a single unpack.
	// FIXME: This doesn't handle the location of the PSHUFD generically, and
	// only works when we have a PSHUFD followed by two half-shuffles.
	if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
	(V.getOpcode() == X86ISD::PSHUFLW \|\|
	V.getOpcode() == X86ISD::PSHUFHW) &&
	V.getOpcode() != N.getOpcode() &&
	V.hasOneUse() && V.getOperand(0).hasOneUse()) {
	SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
	if (D.getOpcode() == X86ISD::PSHUFD) {
	SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
	SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
	int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
	int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
	int WordMask[8];
	for (int i = 0; i < 4; ++i) {
	WordMask[i + NOffset] = Mask[i] + NOffset;
	WordMask[i + VOffset] = VMask[i] + VOffset;
	}
	// Map the word mask through the DWord mask.
	int MappedMask[8];
	for (int i = 0; i < 8; ++i)
	MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
	if (ArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) \|\|
	ArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
	// We can replace all three shuffles with an unpack.
	V = DAG.getBitcast(VT, D.getOperand(0));
	return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
	: X86ISD::UNPCKH,
	DL, VT, V, V);
	}
	}
	}

	break;

	case X86ISD::PSHUFD:
	if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG))
	return NewN;

	break;
	}

	return SDValue();
	}

	/// Checks if the shuffle mask takes subsequent elements
	/// alternately from two vectors.
	/// For example <0, 5, 2, 7> or <8, 1, 10, 3, 12, 5, 14, 7> are both correct.
	static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {

	int ParitySrc[2] = {-1, -1};
	unsigned Size = Mask.size();
	for (unsigned i = 0; i != Size; ++i) {
	int M = Mask[i];
	if (M < 0)
	continue;

	// Make sure we are using the matching element from the input.
	if ((M % Size) != i)
	return false;

	// Make sure we use the same input for all elements of the same parity.
	int Src = M / Size;
	if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src)
	return false;
	ParitySrc[i % 2] = Src;
	}

	// Make sure each input is used.
	if (ParitySrc[0] < 0 \|\| ParitySrc[1] < 0 \|\| ParitySrc[0] == ParitySrc[1])
	return false;

	Op0Even = ParitySrc[0] == 0;
	return true;
	}

	/// Returns true iff the shuffle node \p N can be replaced with ADDSUB(SUBADD)
	/// operation. If true is returned then the operands of ADDSUB(SUBADD) operation
	/// are written to the parameters \p Opnd0 and \p Opnd1.
	///
	/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes
	/// so it is easier to generically match. We also insert dummy vector shuffle
	/// nodes for the operands which explicitly discard the lanes which are unused
	/// by this operation to try to flow through the rest of the combiner
	/// the fact that they're unused.
	static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
	SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
	bool &IsSubAdd) {

	EVT VT = N->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!Subtarget.hasSSE3() \|\| !TLI.isTypeLegal(VT) \|\|
	!VT.getSimpleVT().isFloatingPoint())
	return false;

	// We only handle target-independent shuffles.
	// FIXME: It would be easy and harmless to use the target shuffle mask
	// extraction tool to support more.
	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
	return false;

	SDValue V1 = N->getOperand(0);
	SDValue V2 = N->getOperand(1);

	// Make sure we have an FADD and an FSUB.
	if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) \|\|
	(V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) \|\|
	V1.getOpcode() == V2.getOpcode())
	return false;

	// If there are other uses of these operations we can't fold them.
	if (!V1->hasOneUse() \|\| !V2->hasOneUse())
	return false;

	// Ensure that both operations have the same operands. Note that we can
	// commute the FADD operands.
	SDValue LHS, RHS;
	if (V1.getOpcode() == ISD::FSUB) {
	LHS = V1->getOperand(0); RHS = V1->getOperand(1);
	if ((V2->getOperand(0) != LHS \|\| V2->getOperand(1) != RHS) &&
	(V2->getOperand(0) != RHS \|\| V2->getOperand(1) != LHS))
	return false;
	} else {
	assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode");
	LHS = V2->getOperand(0); RHS = V2->getOperand(1);
	if ((V1->getOperand(0) != LHS \|\| V1->getOperand(1) != RHS) &&
	(V1->getOperand(0) != RHS \|\| V1->getOperand(1) != LHS))
	return false;
	}

	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
	bool Op0Even;
	if (!isAddSubOrSubAddMask(Mask, Op0Even))
	return false;

	// It's a subadd if the vector in the even parity is an FADD.
	IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
	: V2->getOpcode() == ISD::FADD;

	Opnd0 = LHS;
	Opnd1 = RHS;
	return true;
	}

	/// Combine shuffle of two fma nodes into FMAddSub or FMSubAdd.
	static SDValue combineShuffleToFMAddSub(SDNode *N,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// We only handle target-independent shuffles.
	// FIXME: It would be easy and harmless to use the target shuffle mask
	// extraction tool to support more.
	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
	return SDValue();

	MVT VT = N->getSimpleValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!Subtarget.hasAnyFMA() \|\| !TLI.isTypeLegal(VT))
	return SDValue();

	// We're trying to match (shuffle fma(a, b, c), X86Fmsub(a, b, c).
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	SDValue FMAdd = Op0, FMSub = Op1;
	if (FMSub.getOpcode() != X86ISD::FMSUB)
	std::swap(FMAdd, FMSub);

	if (FMAdd.getOpcode() != ISD::FMA \|\| FMSub.getOpcode() != X86ISD::FMSUB \|\|
	FMAdd.getOperand(0) != FMSub.getOperand(0) \|\| !FMAdd.hasOneUse() \|\|
	FMAdd.getOperand(1) != FMSub.getOperand(1) \|\| !FMSub.hasOneUse() \|\|
	FMAdd.getOperand(2) != FMSub.getOperand(2))
	return SDValue();

	// Check for correct shuffle mask.
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
	bool Op0Even;
	if (!isAddSubOrSubAddMask(Mask, Op0Even))
	return SDValue();

	// FMAddSub takes zeroth operand from FMSub node.
	SDLoc DL(N);
	bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd;
	unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
	return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1),
	FMAdd.getOperand(2));
	}

	/// Try to combine a shuffle into a target-specific add-sub or
	/// mul-add-sub node.
	static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG))
	return V;

	SDValue Opnd0, Opnd1;
	bool IsSubAdd;
	if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
	return SDValue();

	MVT VT = N->getSimpleValueType(0);
	SDLoc DL(N);

	// Try to generate X86ISD::FMADDSUB node here.
	SDValue Opnd2;
	if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
	unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
	return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
	}

	if (IsSubAdd)
	return SDValue();

	// Do not generate X86ISD::ADDSUB node for 512-bit types even though
	// the ADDSUB idiom has been successfully recognized. There are no known
	// X86 targets with 512-bit ADDSUB instructions!
	if (VT.is512BitVector())
	return SDValue();

	// Do not generate X86ISD::ADDSUB node for FP16's vector types even though
	// the ADDSUB idiom has been successfully recognized. There are no known
	// X86 targets with FP16 ADDSUB instructions!
	if (VT.getVectorElementType() == MVT::f16)
	return SDValue();

	return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
	}

	// We are looking for a shuffle where both sources are concatenated with undef
	// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
	// if we can express this as a single-source shuffle, that's preferable.
	static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasAVX2() \|\| !isa<ShuffleVectorSDNode>(N))
	return SDValue();

	EVT VT = N->getValueType(0);

	// We only care about shuffles of 128/256-bit vectors of 32/64-bit values.
	if (!VT.is128BitVector() && !VT.is256BitVector())
	return SDValue();

	if (VT.getVectorElementType() != MVT::i32 &&
	VT.getVectorElementType() != MVT::i64 &&
	VT.getVectorElementType() != MVT::f32 &&
	VT.getVectorElementType() != MVT::f64)
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Check that both sources are concats with undef.
	if (N0.getOpcode() != ISD::CONCAT_VECTORS \|\|
	N1.getOpcode() != ISD::CONCAT_VECTORS \|\| N0.getNumOperands() != 2 \|\|
	N1.getNumOperands() != 2 \|\| !N0.getOperand(1).isUndef() \|\|
	!N1.getOperand(1).isUndef())
	return SDValue();

	// Construct the new shuffle mask. Elements from the first source retain their
	// index, but elements from the second source no longer need to skip an undef.
	SmallVector<int, 8> Mask;
	int NumElts = VT.getVectorNumElements();

	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
	for (int Elt : SVOp->getMask())
	Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));

	SDLoc DL(N);
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
	N1.getOperand(0));
	return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
	}

	/// If we have a shuffle of AVX/AVX512 (256/512 bit) vectors that only uses the
	/// low half of each source vector and does not set any high half elements in
	/// the destination vector, narrow the shuffle to half its original size.
	static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
	if (!Shuf->getValueType(0).isSimple())
	return SDValue();
	MVT VT = Shuf->getSimpleValueType(0);
	if (!VT.is256BitVector() && !VT.is512BitVector())
	return SDValue();

	// See if we can ignore all of the high elements of the shuffle.
	ArrayRef<int> Mask = Shuf->getMask();
	if (!isUndefUpperHalf(Mask))
	return SDValue();

	// Check if the shuffle mask accesses only the low half of each input vector
	// (half-index output is 0 or 2).
	int HalfIdx1, HalfIdx2;
	SmallVector<int, 8> HalfMask(Mask.size() / 2);
	if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2) \|\|
	(HalfIdx1 % 2 == 1) \|\| (HalfIdx2 % 2 == 1))
	return SDValue();

	// Create a half-width shuffle to replace the unnecessarily wide shuffle.
	// The trick is knowing that all of the insert/extract are actually free
	// subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle
	// of narrow inputs into a narrow output, and that is always cheaper than
	// the wide shuffle that we started with.
	return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
	Shuf->getOperand(1), HalfMask, HalfIdx1,
	HalfIdx2, false, DAG, /UseConcat/true);
	}

	static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N))
	if (SDValue V = narrowShuffle(Shuf, DAG))
	return V;

	// If we have legalized the vector types, look for blends of FADD and FSUB
	// nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node.
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.isTypeLegal(VT))
	if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
	return AddSub;

	// Attempt to combine into a vector load/broadcast.
	if (SDValue LD = combineToConsecutiveLoads(
	VT, SDValue(N, 0), dl, DAG, Subtarget, /IsAfterLegalize/ true))
	return LD;

	// For AVX2, we sometimes want to combine
	// (vector_shuffle <mask> (concat_vectors t1, undef)
	// (concat_vectors t2, undef))
	// Into:
	// (vector_shuffle <mask> (concat_vectors t1, t2), undef)
	// Since the latter can be efficiently lowered with VPERMD/VPERMQ
	if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))
	return ShufConcat;

	if (isTargetShuffle(N->getOpcode())) {
	SDValue Op(N, 0);
	if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget))
	return Shuffle;

	// Try recursively combining arbitrary sequences of x86 shuffle
	// instructions into higher-order shuffles. We do this after combining
	// specific PSHUF instruction sequences into their minimal form so that we
	// can evaluate how many specialized shuffle instructions are involved in
	// a particular chain.
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;

	// Simplify source operands based on shuffle mask.
	// TODO - merge this into combineX86ShufflesRecursively.
	APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
	if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, DCI))
	return SDValue(N, 0);

	// Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
	// Perform this after other shuffle combines to allow inner shuffles to be
	// combined away first.
	if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, dl))
	return BinOp;
	}

	return SDValue();
	}

	// Simplify variable target shuffle masks based on the demanded elements.
	// TODO: Handle DemandedBits in mask indices as well?
	bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
	SDValue Op, const APInt &DemandedElts, unsigned MaskIndex,
	TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const {
	// If we're demanding all elements don't bother trying to simplify the mask.
	unsigned NumElts = DemandedElts.getBitWidth();
	if (DemandedElts.isAllOnes())
	return false;

	SDValue Mask = Op.getOperand(MaskIndex);
	if (!Mask.hasOneUse())
	return false;

	// Attempt to generically simplify the variable shuffle mask.
	APInt MaskUndef, MaskZero;
	if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
	Depth + 1))
	return true;

	// Attempt to extract+simplify a (constant pool load) shuffle mask.
	// TODO: Support other types from getTargetShuffleMaskIndices?
	SDValue BC = peekThroughOneUseBitcasts(Mask);
	EVT BCVT = BC.getValueType();
	auto *Load = dyn_cast<LoadSDNode>(BC);
	if (!Load)
	return false;

	const Constant *C = getTargetConstantFromNode(Load);
	if (!C)
	return false;

	Type *CTy = C->getType();
	if (!CTy->isVectorTy() \|\|
	CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits())
	return false;

	// Handle scaling for i64 elements on 32-bit targets.
	unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements();
	if (NumCstElts != NumElts && NumCstElts != (NumElts * 2))
	return false;
	unsigned Scale = NumCstElts / NumElts;

	// Simplify mask if we have an undemanded element that is not undef.
	bool Simplified = false;
	SmallVector<Constant *, 32> ConstVecOps;
	for (unsigned i = 0; i != NumCstElts; ++i) {
	Constant *Elt = C->getAggregateElement(i);
	if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) {
	ConstVecOps.push_back(UndefValue::get(Elt->getType()));
	Simplified = true;
	continue;
	}
	ConstVecOps.push_back(Elt);
	}
	if (!Simplified)
	return false;

	// Generate new constant pool entry + legalize immediately for the load.
	SDLoc DL(Op);
	SDValue CV = TLO.DAG.getConstantPool(ConstantVector::get(ConstVecOps), BCVT);
	SDValue LegalCV = LowerConstantPool(CV, TLO.DAG);
	SDValue NewMask = TLO.DAG.getLoad(
	BCVT, DL, TLO.DAG.getEntryNode(), LegalCV,
	MachinePointerInfo::getConstantPool(TLO.DAG.getMachineFunction()),
	Load->getAlign());
	return TLO.CombineTo(Mask, TLO.DAG.getBitcast(Mask.getValueType(), NewMask));
	}

	bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
	TargetLoweringOpt &TLO, unsigned Depth) const {
	int NumElts = DemandedElts.getBitWidth();
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();

	// Handle special case opcodes.
	switch (Opc) {
	case X86ISD::PMULDQ:
	case X86ISD::PMULUDQ: {
	APInt LHSUndef, LHSZero;
	APInt RHSUndef, RHSZero;
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;
	// Multiply by zero.
	KnownZero = LHSZero \| RHSZero;
	break;
	}
	case X86ISD::VPMADDWD: {
	APInt LHSUndef, LHSZero;
	APInt RHSUndef, RHSZero;
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, 2 * NumElts);

	if (SimplifyDemandedVectorElts(LHS, DemandedSrcElts, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedVectorElts(RHS, DemandedSrcElts, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;

	// TODO: Multiply by zero.

	// If RHS/LHS elements are known zero then we don't need the LHS/RHS equivalent.
	APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
	if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;
	APInt DemandedRHSElts = DemandedSrcElts & ~LHSZero;
	if (SimplifyDemandedVectorElts(RHS, DemandedRHSElts, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;
	break;
	}
	case X86ISD::PSADBW: {
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	assert(VT.getScalarType() == MVT::i64 &&
	LHS.getValueType() == RHS.getValueType() &&
	LHS.getValueType().getScalarType() == MVT::i8 &&
	"Unexpected PSADBW types");

	// Aggressively peek through ops to get at the demanded elts.
	if (!DemandedElts.isAllOnes()) {
	unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
	APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
	SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
	LHS, DemandedSrcElts, TLO.DAG, Depth + 1);
	SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
	RHS, DemandedSrcElts, TLO.DAG, Depth + 1);
	if (NewLHS \|\| NewRHS) {
	NewLHS = NewLHS ? NewLHS : LHS;
	NewRHS = NewRHS ? NewRHS : RHS;
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
	}
	}
	break;
	}
	case X86ISD::VSHL:
	case X86ISD::VSRL:
	case X86ISD::VSRA: {
	// We only need the bottom 64-bits of the (128-bit) shift amount.
	SDValue Amt = Op.getOperand(1);
	MVT AmtVT = Amt.getSimpleValueType();
	assert(AmtVT.is128BitVector() && "Unexpected value type");

	// If we reuse the shift amount just for sse shift amounts then we know that
	// only the bottom 64-bits are only ever used.
	bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) {
	unsigned UseOpc = Use->getOpcode();
	return (UseOpc == X86ISD::VSHL \|\| UseOpc == X86ISD::VSRL \|\|
	UseOpc == X86ISD::VSRA) &&
	Use->getOperand(0) != Amt;
	});

	APInt AmtUndef, AmtZero;
	unsigned NumAmtElts = AmtVT.getVectorNumElements();
	APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
	if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
	Depth + 1, AssumeSingleUse))
	return true;
	[[fallthrough]];
	}
	case X86ISD::VSHLI:
	case X86ISD::VSRLI:
	case X86ISD::VSRAI: {
	SDValue Src = Op.getOperand(0);
	APInt SrcUndef;
	if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
	Depth + 1))
	return true;

	// Fold shift(0,x) -> 0
	if (DemandedElts.isSubsetOf(KnownZero))
	return TLO.CombineTo(
	Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));

	// Aggressively peek through ops to get at the demanded elts.
	if (!DemandedElts.isAllOnes())
	if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
	Src, DemandedElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));
	break;
	}
	case X86ISD::VPSHA:
	case X86ISD::VPSHL:
	case X86ISD::VSHLV:
	case X86ISD::VSRLV:
	case X86ISD::VSRAV: {
	APInt LHSUndef, LHSZero;
	APInt RHSUndef, RHSZero;
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;

	// Fold shift(0,x) -> 0
	if (DemandedElts.isSubsetOf(LHSZero))
	return TLO.CombineTo(
	Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));

	if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;

	KnownZero = LHSZero;
	break;
	}
	case X86ISD::KSHIFTL: {
	SDValue Src = Op.getOperand(0);
	auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
	assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
	unsigned ShiftAmt = Amt->getZExtValue();

	if (ShiftAmt == 0)
	return TLO.CombineTo(Op, Src);

	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
	// single shift. We can do this if the bottom bits (which are shifted
	// out) are never demanded.
	if (Src.getOpcode() == X86ISD::KSHIFTR) {
	if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) {
	unsigned C1 = Src.getConstantOperandVal(1);
	unsigned NewOpc = X86ISD::KSHIFTL;
	int Diff = ShiftAmt - C1;
	if (Diff < 0) {
	Diff = -Diff;
	NewOpc = X86ISD::KSHIFTR;
	}

	SDLoc dl(Op);
	SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
	}
	}

	APInt DemandedSrc = DemandedElts.lshr(ShiftAmt);
	if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
	Depth + 1))
	return true;

	KnownUndef <<= ShiftAmt;
	KnownZero <<= ShiftAmt;
	KnownZero.setLowBits(ShiftAmt);
	break;
	}
	case X86ISD::KSHIFTR: {
	SDValue Src = Op.getOperand(0);
	auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
	assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
	unsigned ShiftAmt = Amt->getZExtValue();

	if (ShiftAmt == 0)
	return TLO.CombineTo(Op, Src);

	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
	// single shift. We can do this if the top bits (which are shifted
	// out) are never demanded.
	if (Src.getOpcode() == X86ISD::KSHIFTL) {
	if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) {
	unsigned C1 = Src.getConstantOperandVal(1);
	unsigned NewOpc = X86ISD::KSHIFTR;
	int Diff = ShiftAmt - C1;
	if (Diff < 0) {
	Diff = -Diff;
	NewOpc = X86ISD::KSHIFTL;
	}

	SDLoc dl(Op);
	SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
	}
	}

	APInt DemandedSrc = DemandedElts.shl(ShiftAmt);
	if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
	Depth + 1))
	return true;

	KnownUndef.lshrInPlace(ShiftAmt);
	KnownZero.lshrInPlace(ShiftAmt);
	KnownZero.setHighBits(ShiftAmt);
	break;
	}
	case X86ISD::ANDNP: {
	// ANDNP = (~LHS & RHS);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);

	auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
	APInt UndefElts;
	SmallVector<APInt> EltBits;
	int NumElts = VT.getVectorNumElements();
	int EltSizeInBits = VT.getScalarSizeInBits();
	APInt OpBits = APInt::getAllOnes(EltSizeInBits);
	APInt OpElts = DemandedElts;
	if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
	EltBits)) {
	OpBits.clearAllBits();
	OpElts.clearAllBits();
	for (int I = 0; I != NumElts; ++I) {
	if (!DemandedElts[I])
	continue;
	if (UndefElts[I]) {
	// We can't assume an undef src element gives an undef dst - the
	// other src might be zero.
	OpBits.setAllBits();
	OpElts.setBit(I);
	} else if ((Invert && !EltBits[I].isAllOnes()) \|\|
	(!Invert && !EltBits[I].isZero())) {
	OpBits \|= Invert ? ~EltBits[I] : EltBits[I];
	OpElts.setBit(I);
	}
	}
	}
	return std::make_pair(OpBits, OpElts);
	};
	APInt BitsLHS, EltsLHS;
	APInt BitsRHS, EltsRHS;
	std::tie(BitsLHS, EltsLHS) = GetDemandedMasks(RHS);
	std::tie(BitsRHS, EltsRHS) = GetDemandedMasks(LHS, true);

	APInt LHSUndef, LHSZero;
	APInt RHSUndef, RHSZero;
	if (SimplifyDemandedVectorElts(LHS, EltsLHS, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedVectorElts(RHS, EltsRHS, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;

	if (!DemandedElts.isAllOnes()) {
	SDValue NewLHS = SimplifyMultipleUseDemandedBits(LHS, BitsLHS, EltsLHS,
	TLO.DAG, Depth + 1);
	SDValue NewRHS = SimplifyMultipleUseDemandedBits(RHS, BitsRHS, EltsRHS,
	TLO.DAG, Depth + 1);
	if (NewLHS \|\| NewRHS) {
	NewLHS = NewLHS ? NewLHS : LHS;
	NewRHS = NewRHS ? NewRHS : RHS;
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
	}
	}
	break;
	}
	case X86ISD::CVTSI2P:
	case X86ISD::CVTUI2P: {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	APInt SrcUndef, SrcZero;
	APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
	if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
	Depth + 1))
	return true;
	break;
	}
	case X86ISD::PACKSS:
	case X86ISD::PACKUS: {
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);

	APInt DemandedLHS, DemandedRHS;
	getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);

	APInt LHSUndef, LHSZero;
	if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;
	APInt RHSUndef, RHSZero;
	if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;

	// TODO - pass on known zero/undef.

	// Aggressively peek through ops to get at the demanded elts.
	// TODO - we should do this for all target/faux shuffles ops.
	if (!DemandedElts.isAllOnes()) {
	SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
	TLO.DAG, Depth + 1);
	SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
	TLO.DAG, Depth + 1);
	if (NewN0 \|\| NewN1) {
	NewN0 = NewN0 ? NewN0 : N0;
	NewN1 = NewN1 ? NewN1 : N1;
	return TLO.CombineTo(Op,
	TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));
	}
	}
	break;
	}
	case X86ISD::HADD:
	case X86ISD::HSUB:
	case X86ISD::FHADD:
	case X86ISD::FHSUB: {
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);

	APInt DemandedLHS, DemandedRHS;
	getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);

	APInt LHSUndef, LHSZero;
	if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO,
	Depth + 1))
	return true;
	APInt RHSUndef, RHSZero;
	if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO,
	Depth + 1))
	return true;

	// TODO - pass on known zero/undef.

	// Aggressively peek through ops to get at the demanded elts.
	// TODO: Handle repeated operands.
	if (N0 != N1 && !DemandedElts.isAllOnes()) {
	SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
	TLO.DAG, Depth + 1);
	SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
	TLO.DAG, Depth + 1);
	if (NewN0 \|\| NewN1) {
	NewN0 = NewN0 ? NewN0 : N0;
	NewN1 = NewN1 ? NewN1 : N1;
	return TLO.CombineTo(Op,
	TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));
	}
	}
	break;
	}
	case X86ISD::VTRUNC:
	case X86ISD::VTRUNCS:
	case X86ISD::VTRUNCUS: {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
	APInt SrcUndef, SrcZero;
	if (SimplifyDemandedVectorElts(Src, DemandedSrc, SrcUndef, SrcZero, TLO,
	Depth + 1))
	return true;
	KnownZero = SrcZero.zextOrTrunc(NumElts);
	KnownUndef = SrcUndef.zextOrTrunc(NumElts);
	break;
	}
	case X86ISD::BLENDV: {
	APInt SelUndef, SelZero;
	if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, SelUndef,
	SelZero, TLO, Depth + 1))
	return true;

	// TODO: Use SelZero to adjust LHS/RHS DemandedElts.
	APInt LHSUndef, LHSZero;
	if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, LHSUndef,
	LHSZero, TLO, Depth + 1))
	return true;

	APInt RHSUndef, RHSZero;
	if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedElts, RHSUndef,
	RHSZero, TLO, Depth + 1))
	return true;

	KnownZero = LHSZero & RHSZero;
	KnownUndef = LHSUndef & RHSUndef;
	break;
	}
	case X86ISD::VZEXT_MOVL: {
	// If upper demanded elements are already zero then we have nothing to do.
	SDValue Src = Op.getOperand(0);
	APInt DemandedUpperElts = DemandedElts;
	DemandedUpperElts.clearLowBits(1);
	if (TLO.DAG.MaskedVectorIsZero(Src, DemandedUpperElts, Depth + 1))
	return TLO.CombineTo(Op, Src);
	break;
	}
	case X86ISD::VBROADCAST: {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	if (!SrcVT.isVector())
	break;
	// Don't bother broadcasting if we just need the 0'th element.
	if (DemandedElts == 1) {
	if (Src.getValueType() != VT)
	Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
	SDLoc(Op));
	return TLO.CombineTo(Op, Src);
	}
	APInt SrcUndef, SrcZero;
	APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
	if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
	Depth + 1))
	return true;
	// Aggressively peek through src to get at the demanded elt.
	// TODO - we should do this for all target/faux shuffles ops.
	if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
	Src, SrcElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
	break;
	}
	case X86ISD::VPERMV:
	if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 0, TLO,
	Depth))
	return true;
	break;
	case X86ISD::PSHUFB:
	case X86ISD::VPERMV3:
	case X86ISD::VPERMILPV:
	if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 1, TLO,
	Depth))
	return true;
	break;
	case X86ISD::VPPERM:
	case X86ISD::VPERMIL2:
	if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 2, TLO,
	Depth))
	return true;
	break;
	}

	// For 256/512-bit ops that are 128/256-bit ops glued together, if we do not
	// demand any of the high elements, then narrow the op to 128/256-bits: e.g.
	// (op ymm0, ymm1) --> insert undef, (op xmm0, xmm1), 0
	if ((VT.is256BitVector() \|\| VT.is512BitVector()) &&
	DemandedElts.lshr(NumElts / 2) == 0) {
	unsigned SizeInBits = VT.getSizeInBits();
	unsigned ExtSizeInBits = SizeInBits / 2;

	// See if 512-bit ops only use the bottom 128-bits.
	if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0)
	ExtSizeInBits = SizeInBits / 4;

	switch (Opc) {
	// Scalar broadcast.
	case X86ISD::VBROADCAST: {
	SDLoc DL(Op);
	SDValue Src = Op.getOperand(0);
	if (Src.getValueSizeInBits() > ExtSizeInBits)
	Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
	EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
	ExtSizeInBits / VT.getScalarSizeInBits());
	SDValue Bcst = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, BcstVT, Src);
	return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
	TLO.DAG, DL, ExtSizeInBits));
	}
	case X86ISD::VBROADCAST_LOAD: {
	SDLoc DL(Op);
	auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
	EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
	ExtSizeInBits / VT.getScalarSizeInBits());
	SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other);
	SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
	SDValue Bcst = TLO.DAG.getMemIntrinsicNode(
	X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(),
	MemIntr->getMemOperand());
	TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
	Bcst.getValue(1));
	return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
	TLO.DAG, DL, ExtSizeInBits));
	}
	// Subvector broadcast.
	case X86ISD::SUBV_BROADCAST_LOAD: {
	auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
	EVT MemVT = MemIntr->getMemoryVT();
	if (ExtSizeInBits == MemVT.getStoreSizeInBits()) {
	SDLoc DL(Op);
	SDValue Ld =
	TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(),
	MemIntr->getBasePtr(), MemIntr->getMemOperand());
	TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
	Ld.getValue(1));
	return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0,
	TLO.DAG, DL, ExtSizeInBits));
	} else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) {
	SDLoc DL(Op);
	EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
	ExtSizeInBits / VT.getScalarSizeInBits());
	if (SDValue BcstLd =
	getBROADCAST_LOAD(Opc, DL, BcstVT, MemVT, MemIntr, 0, TLO.DAG))
	return TLO.CombineTo(Op,
	insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0,
	TLO.DAG, DL, ExtSizeInBits));
	}
	break;
	}
	// Byte shifts by immediate.
	case X86ISD::VSHLDQ:
	case X86ISD::VSRLDQ:
	// Shift by uniform.
	case X86ISD::VSHL:
	case X86ISD::VSRL:
	case X86ISD::VSRA:
	// Shift by immediate.
	case X86ISD::VSHLI:
	case X86ISD::VSRLI:
	case X86ISD::VSRAI: {
	SDLoc DL(Op);
	SDValue Ext0 =
	extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
	SDValue ExtOp =
	TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1));
	SDValue UndefVec = TLO.DAG.getUNDEF(VT);
	SDValue Insert =
	insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
	return TLO.CombineTo(Op, Insert);
	}
	case X86ISD::VPERMI: {
	// Simplify PERMPD/PERMQ to extract_subvector.
	// TODO: This should be done in shuffle combining.
	if (VT == MVT::v4f64 \|\| VT == MVT::v4i64) {
	SmallVector<int, 4> Mask;
	DecodeVPERMMask(NumElts, Op.getConstantOperandVal(1), Mask);
	if (isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3)) {
	SDLoc DL(Op);
	SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128);
	SDValue UndefVec = TLO.DAG.getUNDEF(VT);
	SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128);
	return TLO.CombineTo(Op, Insert);
	}
	}
	break;
	}
	case X86ISD::VPERM2X128: {
	// Simplify VPERM2F128/VPERM2I128 to extract_subvector.
	SDLoc DL(Op);
	unsigned LoMask = Op.getConstantOperandVal(2) & 0xF;
	if (LoMask & 0x8)
	return TLO.CombineTo(
	Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, DL));
	unsigned EltIdx = (LoMask & 0x1) * (NumElts / 2);
	unsigned SrcIdx = (LoMask & 0x2) >> 1;
	SDValue ExtOp =
	extractSubVector(Op.getOperand(SrcIdx), EltIdx, TLO.DAG, DL, 128);
	SDValue UndefVec = TLO.DAG.getUNDEF(VT);
	SDValue Insert =
	insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
	return TLO.CombineTo(Op, Insert);
	}
	// Zero upper elements.
	case X86ISD::VZEXT_MOVL:
	// Target unary shuffles by immediate:
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	case X86ISD::VPERMILPI:
	// (Non-Lane Crossing) Target Shuffles.
	case X86ISD::VPERMILPV:
	case X86ISD::VPERMIL2:
	case X86ISD::PSHUFB:
	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	case X86ISD::BLENDI:
	// Integer ops.
	case X86ISD::PACKSS:
	case X86ISD::PACKUS:
	// Horizontal Ops.
	case X86ISD::HADD:
	case X86ISD::HSUB:
	case X86ISD::FHADD:
	case X86ISD::FHSUB: {
	SDLoc DL(Op);
	SmallVector<SDValue, 4> Ops;
	for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
	SDValue SrcOp = Op.getOperand(i);
	EVT SrcVT = SrcOp.getValueType();
	assert((!SrcVT.isVector() \|\| SrcVT.getSizeInBits() == SizeInBits) &&
	"Unsupported vector size");
	Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL,
	ExtSizeInBits)
	: SrcOp);
	}
	MVT ExtVT = VT.getSimpleVT();
	ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
	ExtSizeInBits / ExtVT.getScalarSizeInBits());
	SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops);
	SDValue UndefVec = TLO.DAG.getUNDEF(VT);
	SDValue Insert =
	insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
	return TLO.CombineTo(Op, Insert);
	}
	}
	}

	// For splats, unless we only demand the 0'th element,
	// stop attempts at simplification here, we aren't going to improve things,
	// this is better than any potential shuffle.
	if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /AllowUndefs/false))
	return false;

	// Get target/faux shuffle mask.
	APInt OpUndef, OpZero;
	SmallVector<int, 64> OpMask;
	SmallVector<SDValue, 2> OpInputs;
	if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef,
	OpZero, TLO.DAG, Depth, false))
	return false;

	// Shuffle inputs must be the same size as the result.
	if (OpMask.size() != (unsigned)NumElts \|\|
	llvm::any_of(OpInputs, [VT](SDValue V) {
	return VT.getSizeInBits() != V.getValueSizeInBits() \|\|
	!V.getValueType().isVector();
	}))
	return false;

	KnownZero = OpZero;
	KnownUndef = OpUndef;

	// Check if shuffle mask can be simplified to undef/zero/identity.
	int NumSrcs = OpInputs.size();
	for (int i = 0; i != NumElts; ++i)
	if (!DemandedElts[i])
	OpMask[i] = SM_SentinelUndef;

	if (isUndefInRange(OpMask, 0, NumElts)) {
	KnownUndef.setAllBits();
	return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
	}
	if (isUndefOrZeroInRange(OpMask, 0, NumElts)) {
	KnownZero.setAllBits();
	return TLO.CombineTo(
	Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
	}
	for (int Src = 0; Src != NumSrcs; ++Src)
	if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
	return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src]));

	// Attempt to simplify inputs.
	for (int Src = 0; Src != NumSrcs; ++Src) {
	// TODO: Support inputs of different types.
	if (OpInputs[Src].getValueType() != VT)
	continue;

	int Lo = Src * NumElts;
	APInt SrcElts = APInt::getZero(NumElts);
	for (int i = 0; i != NumElts; ++i)
	if (DemandedElts[i]) {
	int M = OpMask[i] - Lo;
	if (0 <= M && M < NumElts)
	SrcElts.setBit(M);
	}

	// TODO - Propagate input undef/zero elts.
	APInt SrcUndef, SrcZero;
	if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
	TLO, Depth + 1))
	return true;
	}

	// If we don't demand all elements, then attempt to combine to a simpler
	// shuffle.
	// We need to convert the depth to something combineX86ShufflesRecursively
	// can handle - so pretend its Depth == 0 again, and reduce the max depth
	// to match. This prevents combineX86ShuffleChain from returning a
	// combined shuffle that's the same as the original root, causing an
	// infinite loop.
	if (!DemandedElts.isAllOnes()) {
	assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range");

	SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef);
	for (int i = 0; i != NumElts; ++i)
	if (DemandedElts[i])
	DemandedMask[i] = i;

	SDValue NewShuffle = combineX86ShufflesRecursively(
	{Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
	/HasVarMask/ false,
	/AllowCrossLaneVarMask/ true, /AllowPerLaneVarMask/ true, TLO.DAG,
	Subtarget);
	if (NewShuffle)
	return TLO.CombineTo(Op, NewShuffle);
	}

	return false;
	}

	bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
	SDValue Op, const APInt &OriginalDemandedBits,
	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
	unsigned Depth) const {
	EVT VT = Op.getValueType();
	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
	unsigned Opc = Op.getOpcode();
	switch(Opc) {
	case X86ISD::VTRUNC: {
	KnownBits KnownOp;
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();

	// Simplify the input, using demanded bit information.
	APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits());
	APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements());
	if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1))
	return true;
	break;
	}
	case X86ISD::PMULDQ:
	case X86ISD::PMULUDQ: {
	// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
	KnownBits KnownLHS, KnownRHS;
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);

	// Don't mask bits on 32-bit AVX512 targets which might lose a broadcast.
	// FIXME: Can we bound this better?
	APInt DemandedMask = APInt::getLowBitsSet(64, 32);
	APInt DemandedMaskLHS = APInt::getAllOnes(64);
	APInt DemandedMaskRHS = APInt::getAllOnes(64);

	bool Is32BitAVX512 = !Subtarget.is64Bit() && Subtarget.hasAVX512();
	if (!Is32BitAVX512 \|\| !TLO.DAG.isSplatValue(LHS))
	DemandedMaskLHS = DemandedMask;
	if (!Is32BitAVX512 \|\| !TLO.DAG.isSplatValue(RHS))
	DemandedMaskRHS = DemandedMask;

	if (SimplifyDemandedBits(LHS, DemandedMaskLHS, OriginalDemandedElts,
	KnownLHS, TLO, Depth + 1))
	return true;
	if (SimplifyDemandedBits(RHS, DemandedMaskRHS, OriginalDemandedElts,
	KnownRHS, TLO, Depth + 1))
	return true;

	// PMULUDQ(X,1) -> AND(X,(1<<32)-1) 'getZeroExtendInReg'.
	KnownRHS = KnownRHS.trunc(32);
	if (Opc == X86ISD::PMULUDQ && KnownRHS.isConstant() &&
	KnownRHS.getConstant().isOne()) {
	SDLoc DL(Op);
	SDValue Mask = TLO.DAG.getConstant(DemandedMask, DL, VT);
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, DL, VT, LHS, Mask));
	}

	// Aggressively peek through ops to get at the demanded low bits.
	SDValue DemandedLHS = SimplifyMultipleUseDemandedBits(
	LHS, DemandedMaskLHS, OriginalDemandedElts, TLO.DAG, Depth + 1);
	SDValue DemandedRHS = SimplifyMultipleUseDemandedBits(
	RHS, DemandedMaskRHS, OriginalDemandedElts, TLO.DAG, Depth + 1);
	if (DemandedLHS \|\| DemandedRHS) {
	DemandedLHS = DemandedLHS ? DemandedLHS : LHS;
	DemandedRHS = DemandedRHS ? DemandedRHS : RHS;
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS));
	}
	break;
	}
	case X86ISD::VSHLI: {
	SDValue Op0 = Op.getOperand(0);

	unsigned ShAmt = Op.getConstantOperandVal(1);
	if (ShAmt >= BitWidth)
	break;

	APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);

	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
	// single shift. We can do this if the bottom bits (which are shifted
	// out) are never demanded.
	if (Op0.getOpcode() == X86ISD::VSRLI &&
	OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
	unsigned Shift2Amt = Op0.getConstantOperandVal(1);
	if (Shift2Amt < BitWidth) {
	int Diff = ShAmt - Shift2Amt;
	if (Diff == 0)
	return TLO.CombineTo(Op, Op0.getOperand(0));

	unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
	SDValue NewShift = TLO.DAG.getNode(
	NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
	TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
	return TLO.CombineTo(Op, NewShift);
	}
	}

	// If we are only demanding sign bits then we can use the shift source directly.
	unsigned NumSignBits =
	TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1);
	unsigned UpperDemandedBits =
	BitWidth - OriginalDemandedBits.countTrailingZeros();
	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
	return TLO.CombineTo(Op, Op0);

	if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
	TLO, Depth + 1))
	return true;

	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	Known.Zero <<= ShAmt;
	Known.One <<= ShAmt;

	// Low bits known zero.
	Known.Zero.setLowBits(ShAmt);
	return false;
	}
	case X86ISD::VSRLI: {
	unsigned ShAmt = Op.getConstantOperandVal(1);
	if (ShAmt >= BitWidth)
	break;

	APInt DemandedMask = OriginalDemandedBits << ShAmt;

	if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
	OriginalDemandedElts, Known, TLO, Depth + 1))
	return true;

	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	Known.Zero.lshrInPlace(ShAmt);
	Known.One.lshrInPlace(ShAmt);

	// High bits known zero.
	Known.Zero.setHighBits(ShAmt);
	return false;
	}
	case X86ISD::VSRAI: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue();
	if (ShAmt >= BitWidth)
	break;

	APInt DemandedMask = OriginalDemandedBits << ShAmt;

	// If we just want the sign bit then we don't need to shift it.
	if (OriginalDemandedBits.isSignMask())
	return TLO.CombineTo(Op, Op0);

	// fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
	if (Op0.getOpcode() == X86ISD::VSHLI &&
	Op.getOperand(1) == Op0.getOperand(1)) {
	SDValue Op00 = Op0.getOperand(0);
	unsigned NumSignBits =
	TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts);
	if (ShAmt < NumSignBits)
	return TLO.CombineTo(Op, Op00);
	}

	// If any of the demanded bits are produced by the sign extension, we also
	// demand the input sign bit.
	if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
	DemandedMask.setSignBit();

	if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
	TLO, Depth + 1))
	return true;

	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	Known.Zero.lshrInPlace(ShAmt);
	Known.One.lshrInPlace(ShAmt);

	// If the input sign bit is known to be zero, or if none of the top bits
	// are demanded, turn this into an unsigned shift right.
	if (Known.Zero[BitWidth - ShAmt - 1] \|\|
	OriginalDemandedBits.countLeadingZeros() >= ShAmt)
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));

	// High bits are known one.
	if (Known.One[BitWidth - ShAmt - 1])
	Known.One.setHighBits(ShAmt);
	return false;
	}
	case X86ISD::BLENDV: {
	SDValue Sel = Op.getOperand(0);
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);

	APInt SignMask = APInt::getSignMask(BitWidth);
	SDValue NewSel = SimplifyMultipleUseDemandedBits(
	Sel, SignMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
	SDValue NewLHS = SimplifyMultipleUseDemandedBits(
	LHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1);
	SDValue NewRHS = SimplifyMultipleUseDemandedBits(
	RHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1);

	if (NewSel \|\| NewLHS \|\| NewRHS) {
	NewSel = NewSel ? NewSel : Sel;
	NewLHS = NewLHS ? NewLHS : LHS;
	NewRHS = NewRHS ? NewRHS : RHS;
	return TLO.CombineTo(Op, TLO.DAG.getNode(X86ISD::BLENDV, SDLoc(Op), VT,
	NewSel, NewLHS, NewRHS));
	}
	break;
	}
	case X86ISD::PEXTRB:
	case X86ISD::PEXTRW: {
	SDValue Vec = Op.getOperand(0);
	auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	MVT VecVT = Vec.getSimpleValueType();
	unsigned NumVecElts = VecVT.getVectorNumElements();

	if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {
	unsigned Idx = CIdx->getZExtValue();
	unsigned VecBitWidth = VecVT.getScalarSizeInBits();

	// If we demand no bits from the vector then we must have demanded
	// bits from the implict zext - simplify to zero.
	APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth);
	if (DemandedVecBits == 0)
	return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));

	APInt KnownUndef, KnownZero;
	APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx);
	if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
	KnownZero, TLO, Depth + 1))
	return true;

	KnownBits KnownVec;
	if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
	KnownVec, TLO, Depth + 1))
	return true;

	if (SDValue V = SimplifyMultipleUseDemandedBits(
	Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1)));

	Known = KnownVec.zext(BitWidth);
	return false;
	}
	break;
	}
	case X86ISD::PINSRB:
	case X86ISD::PINSRW: {
	SDValue Vec = Op.getOperand(0);
	SDValue Scl = Op.getOperand(1);
	auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	MVT VecVT = Vec.getSimpleValueType();

	if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
	unsigned Idx = CIdx->getZExtValue();
	if (!OriginalDemandedElts[Idx])
	return TLO.CombineTo(Op, Vec);

	KnownBits KnownVec;
	APInt DemandedVecElts(OriginalDemandedElts);
	DemandedVecElts.clearBit(Idx);
	if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
	KnownVec, TLO, Depth + 1))
	return true;

	KnownBits KnownScl;
	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
	APInt DemandedSclBits = OriginalDemandedBits.zext(NumSclBits);
	if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
	return true;

	KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits());
	Known = KnownBits::commonBits(KnownVec, KnownScl);
	return false;
	}
	break;
	}
	case X86ISD::PACKSS:
	// PACKSS saturates to MIN/MAX integer values. So if we just want the
	// sign bit then we can just ask for the source operands sign bit.
	// TODO - add known bits handling.
	if (OriginalDemandedBits.isSignMask()) {
	APInt DemandedLHS, DemandedRHS;
	getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS);

	KnownBits KnownLHS, KnownRHS;
	APInt SignMask = APInt::getSignMask(BitWidth * 2);
	if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS,
	KnownLHS, TLO, Depth + 1))
	return true;
	if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
	KnownRHS, TLO, Depth + 1))
	return true;

	// Attempt to avoid multi-use ops if we don't need anything from them.
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1);
	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
	Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1);
	if (DemandedOp0 \|\| DemandedOp1) {
	SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0);
	SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1);
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1));
	}
	}
	// TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
	break;
	case X86ISD::VBROADCAST: {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	APInt DemandedElts = APInt::getOneBitSet(
	SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1, 0);
	if (SimplifyDemandedBits(Src, OriginalDemandedBits, DemandedElts, Known,
	TLO, Depth + 1))
	return true;
	// If we don't need the upper bits, attempt to narrow the broadcast source.
	// Don't attempt this on AVX512 as it might affect broadcast folding.
	// TODO: Should we attempt this for i32/i16 splats? They tend to be slower.
	if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() &&
	OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2) &&
	Src->hasOneUse()) {
	MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2);
	SDValue NewSrc =
	TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src);
	MVT NewVT = MVT::getVectorVT(NewSrcVT, VT.getVectorNumElements() * 2);
	SDValue NewBcst =
	TLO.DAG.getNode(X86ISD::VBROADCAST, SDLoc(Op), NewVT, NewSrc);
	return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, NewBcst));
	}
	break;
	}
	case X86ISD::PCMPGT:
	// icmp sgt(0, R) == ashr(R, BitWidth-1).
	// iff we only need the sign bit then we can use R directly.
	if (OriginalDemandedBits.isSignMask() &&
	ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
	return TLO.CombineTo(Op, Op.getOperand(1));
	break;
	case X86ISD::MOVMSK: {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	unsigned SrcBits = SrcVT.getScalarSizeInBits();
	unsigned NumElts = SrcVT.getVectorNumElements();

	// If we don't need the sign bits at all just return zero.
	if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
	return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));

	// See if we only demand bits from the lower 128-bit vector.
	if (SrcVT.is256BitVector() &&
	OriginalDemandedBits.getActiveBits() <= (NumElts / 2)) {
	SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src));
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
	}

	// Only demand the vector elements of the sign bits we need.
	APInt KnownUndef, KnownZero;
	APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
	if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
	TLO, Depth + 1))
	return true;

	Known.Zero = KnownZero.zext(BitWidth);
	Known.Zero.setHighBits(BitWidth - NumElts);

	// MOVMSK only uses the MSB from each vector element.
	KnownBits KnownSrc;
	APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
	if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
	Depth + 1))
	return true;

	if (KnownSrc.One[SrcBits - 1])
	Known.One.setLowBits(NumElts);
	else if (KnownSrc.Zero[SrcBits - 1])
	Known.Zero.setLowBits(NumElts);

	// Attempt to avoid multi-use os if we don't need anything from it.
	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
	Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
	return false;
	}
	case X86ISD::BEXTR:
	case X86ISD::BEXTRI: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// Only bottom 16-bits of the control bits are required.
	if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
	// NOTE: SimplifyDemandedBits won't do this for constants.
	uint64_t Val1 = Cst1->getZExtValue();
	uint64_t MaskedVal1 = Val1 & 0xFFFF;
	if (Opc == X86ISD::BEXTR && MaskedVal1 != Val1) {
	SDLoc DL(Op);
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0,
	TLO.DAG.getConstant(MaskedVal1, DL, VT)));
	}

	unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
	unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);

	// If the length is 0, the result is 0.
	if (Length == 0) {
	Known.setAllZero();
	return false;
	}

	if ((Shift + Length) <= BitWidth) {
	APInt DemandedMask = APInt::getBitsSet(BitWidth, Shift, Shift + Length);
	if (SimplifyDemandedBits(Op0, DemandedMask, Known, TLO, Depth + 1))
	return true;

	Known = Known.extractBits(Length, Shift);
	Known = Known.zextOrTrunc(BitWidth);
	return false;
	}
	} else {
	assert(Opc == X86ISD::BEXTR && "Unexpected opcode!");
	KnownBits Known1;
	APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16));
	if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1))
	return true;

	// If the length is 0, replace with 0.
	KnownBits LengthBits = Known1.extractBits(8, 8);
	if (LengthBits.isZero())
	return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
	}

	break;
	}
	case X86ISD::PDEP: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros();
	APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);

	// If the demanded bits has leading zeroes, we don't demand those from the
	// mask.
	if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1))
	return true;

	// The number of possible 1s in the mask determines the number of LSBs of
	// operand 0 used. Undemanded bits from the mask don't matter so filter
	// them before counting.
	KnownBits Known2;
	uint64_t Count = (~Known.Zero & LoMask).countPopulation();
	APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count));
	if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1))
	return true;

	// Zeroes are retained from the mask, but not ones.
	Known.One.clearAllBits();
	// The result will have at least as many trailing zeros as the non-mask
	// operand since bits can only map to the same or higher bit position.
	Known.Zero.setLowBits(Known2.countMinTrailingZeros());
	return false;
	}
	}

	return TargetLowering::SimplifyDemandedBitsForTargetNode(
	Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
	}

	SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
	SelectionDAG &DAG, unsigned Depth) const {
	int NumElts = DemandedElts.getBitWidth();
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();

	switch (Opc) {
	case X86ISD::PINSRB:
	case X86ISD::PINSRW: {
	// If we don't demand the inserted element, return the base vector.
	SDValue Vec = Op.getOperand(0);
	auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	MVT VecVT = Vec.getSimpleValueType();
	if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
	!DemandedElts[CIdx->getZExtValue()])
	return Vec;
	break;
	}
	case X86ISD::VSHLI: {
	// If we are only demanding sign bits then we can use the shift source
	// directly.
	SDValue Op0 = Op.getOperand(0);
	unsigned ShAmt = Op.getConstantOperandVal(1);
	unsigned BitWidth = DemandedBits.getBitWidth();
	unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
	unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
	return Op0;
	break;
	}
	case X86ISD::VSRAI:
	// iff we only need the sign bit then we can use the source directly.
	// TODO: generalize where we only demand extended signbits.
	if (DemandedBits.isSignMask())
	return Op.getOperand(0);
	break;
	case X86ISD::PCMPGT:
	// icmp sgt(0, R) == ashr(R, BitWidth-1).
	// iff we only need the sign bit then we can use R directly.
	if (DemandedBits.isSignMask() &&
	ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
	return Op.getOperand(1);
	break;
	case X86ISD::ANDNP: {
	// ANDNP = (~LHS & RHS);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);

	KnownBits LHSKnown = DAG.computeKnownBits(LHS, DemandedElts, Depth + 1);
	KnownBits RHSKnown = DAG.computeKnownBits(RHS, DemandedElts, Depth + 1);

	// If all of the demanded bits are known 0 on LHS and known 0 on RHS, then
	// the (inverted) LHS bits cannot contribute to the result of the 'andn' in
	// this context, so return RHS.
	if (DemandedBits.isSubsetOf(RHSKnown.Zero \| LHSKnown.Zero))
	return RHS;
	break;
	}
	}

	APInt ShuffleUndef, ShuffleZero;
	SmallVector<int, 16> ShuffleMask;
	SmallVector<SDValue, 2> ShuffleOps;
	if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask,
	ShuffleUndef, ShuffleZero, DAG, Depth, false)) {
	// If all the demanded elts are from one operand and are inline,
	// then we can use the operand directly.
	int NumOps = ShuffleOps.size();
	if (ShuffleMask.size() == (unsigned)NumElts &&
	llvm::all_of(ShuffleOps, [VT](SDValue V) {
	return VT.getSizeInBits() == V.getValueSizeInBits();
	})) {

	if (DemandedElts.isSubsetOf(ShuffleUndef))
	return DAG.getUNDEF(VT);
	if (DemandedElts.isSubsetOf(ShuffleUndef \| ShuffleZero))
	return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op));

	// Bitmask that indicates which ops have only been accessed 'inline'.
	APInt IdentityOp = APInt::getAllOnes(NumOps);
	for (int i = 0; i != NumElts; ++i) {
	int M = ShuffleMask[i];
	if (!DemandedElts[i] \|\| ShuffleUndef[i])
	continue;
	int OpIdx = M / NumElts;
	int EltIdx = M % NumElts;
	if (M < 0 \|\| EltIdx != i) {
	IdentityOp.clearAllBits();
	break;
	}
	IdentityOp &= APInt::getOneBitSet(NumOps, OpIdx);
	if (IdentityOp == 0)
	break;
	}
	assert((IdentityOp == 0 \|\| IdentityOp.countPopulation() == 1) &&
	"Multiple identity shuffles detected");

	if (IdentityOp != 0)
	return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]);
	}
	}

	return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
	Op, DemandedBits, DemandedElts, DAG, Depth);
	}

	bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
	bool PoisonOnly, unsigned Depth) const {
	unsigned EltsBits = Op.getScalarValueSizeInBits();
	unsigned NumElts = DemandedElts.getBitWidth();

	// TODO: Add more target shuffles.
	switch (Op.getOpcode()) {
	case X86ISD::PSHUFD:
	case X86ISD::VPERMILPI: {
	SmallVector<int, 8> Mask;
	DecodePSHUFMask(NumElts, EltsBits, Op.getConstantOperandVal(1), Mask);

	APInt DemandedSrcElts = APInt::getZero(NumElts);
	for (unsigned I = 0; I != NumElts; ++I)
	if (DemandedElts[I])
	DemandedSrcElts.setBit(Mask[I]);

	return DAG.isGuaranteedNotToBeUndefOrPoison(
	Op.getOperand(0), DemandedSrcElts, PoisonOnly, Depth + 1);
	}
	}
	return TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
	Op, DemandedElts, DAG, PoisonOnly, Depth);
	}

	bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {

	// TODO: Add more target shuffles.
	switch (Op.getOpcode()) {
	case X86ISD::PSHUFD:
	case X86ISD::VPERMILPI:
	return false;
	}
	return TargetLowering::canCreateUndefOrPoisonForTargetNode(
	Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
	}

	bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
	const APInt &DemandedElts,
	APInt &UndefElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	unsigned NumElts = DemandedElts.getBitWidth();
	unsigned Opc = Op.getOpcode();

	switch (Opc) {
	case X86ISD::VBROADCAST:
	case X86ISD::VBROADCAST_LOAD:
	UndefElts = APInt::getNullValue(NumElts);
	return true;
	}

	return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
	DAG, Depth);
	}

	// Helper to peek through bitops/trunc/setcc to determine size of source vector.
	// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
	static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
	bool AllowTruncate) {
	switch (Src.getOpcode()) {
	case ISD::TRUNCATE:
	if (!AllowTruncate)
	return false;
	[[fallthrough]];
	case ISD::SETCC:
	return Src.getOperand(0).getValueSizeInBits() == Size;
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR:
	return checkBitcastSrcVectorSize(Src.getOperand(0), Size, AllowTruncate) &&
	checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate);
	case ISD::VSELECT:
	return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
	checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate) &&
	checkBitcastSrcVectorSize(Src.getOperand(2), Size, AllowTruncate);
	case ISD::BUILD_VECTOR:
	return ISD::isBuildVectorAllZeros(Src.getNode());

	}
	return false;
	}

	// Helper to flip between AND/OR/XOR opcodes and their X86ISD FP equivalents.
	static unsigned getAltBitOpcode(unsigned Opcode) {
	switch(Opcode) {
	case ISD::AND: return X86ISD::FAND;
	case ISD::OR: return X86ISD::FOR;
	case ISD::XOR: return X86ISD::FXOR;
	case X86ISD::ANDNP: return X86ISD::FANDN;
	}
	llvm_unreachable("Unknown bitwise opcode");
	}

	// Helper to adjust v4i32 MOVMSK expansion to work with SSE1-only targets.
	static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src,
	const SDLoc &DL) {
	EVT SrcVT = Src.getValueType();
	if (SrcVT != MVT::v4i1)
	return SDValue();

	switch (Src.getOpcode()) {
	case ISD::SETCC:
	if (Src.getOperand(0).getValueType() == MVT::v4i32 &&
	ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
	cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) {
	SDValue Op0 = Src.getOperand(0);
	if (ISD::isNormalLoad(Op0.getNode()))
	return DAG.getBitcast(MVT::v4f32, Op0);
	if (Op0.getOpcode() == ISD::BITCAST &&
	Op0.getOperand(0).getValueType() == MVT::v4f32)
	return Op0.getOperand(0);
	}
	break;
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR: {
	SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL);
	SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL);
	if (Op0 && Op1)
	return DAG.getNode(getAltBitOpcode(Src.getOpcode()), DL, MVT::v4f32, Op0,
	Op1);
	break;
	}
	}
	return SDValue();
	}

	// Helper to push sign extension of vXi1 SETCC result through bitops.
	static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
	SDValue Src, const SDLoc &DL) {
	switch (Src.getOpcode()) {
	case ISD::SETCC:
	case ISD::TRUNCATE:
	case ISD::BUILD_VECTOR:
	return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR:
	return DAG.getNode(
	Src.getOpcode(), DL, SExtVT,
	signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
	signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
	case ISD::VSELECT:
	return DAG.getSelect(
	DL, SExtVT, Src.getOperand(0),
	signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
	signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
	}
	llvm_unreachable("Unexpected node type for vXi1 sign extension");
	}

	// Try to match patterns such as
	// (i16 bitcast (v16i1 x))
	// ->
	// (i16 movmsk (16i8 sext (v16i1 x)))
	// before the illegal vector is scalarized on subtargets that don't have legal
	// vxi1 types.
	static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
	const SDLoc &DL,
	const X86Subtarget &Subtarget) {
	EVT SrcVT = Src.getValueType();
	if (!SrcVT.isSimple() \|\| SrcVT.getScalarType() != MVT::i1)
	return SDValue();

	// Recognize the IR pattern for the movmsk intrinsic under SSE1 before type
	// legalization destroys the v4i32 type.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE2()) {
	if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) {
	V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32,
	DAG.getBitcast(MVT::v4f32, V));
	return DAG.getZExtOrTrunc(V, DL, VT);
	}
	}

	// If the input is a truncate from v16i8 or v32i8 go ahead and use a
	// movmskb even with avx512. This will be better than truncating to vXi1 and
	// using a kmov. This can especially help KNL if the input is a v16i8/v32i8
	// vpcmpeqb/vpcmpgtb.
	bool PreferMovMsk = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
	(Src.getOperand(0).getValueType() == MVT::v16i8 \|\|
	Src.getOperand(0).getValueType() == MVT::v32i8 \|\|
	Src.getOperand(0).getValueType() == MVT::v64i8);

	// Prefer movmsk for AVX512 for (bitcast (setlt X, 0)) which can be handled
	// directly with vpmovmskb/vmovmskps/vmovmskpd.
	if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
	cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT &&
	ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
	EVT CmpVT = Src.getOperand(0).getValueType();
	EVT EltVT = CmpVT.getVectorElementType();
	if (CmpVT.getSizeInBits() <= 256 &&
	(EltVT == MVT::i8 \|\| EltVT == MVT::i32 \|\| EltVT == MVT::i64))
	PreferMovMsk = true;
	}

	// With AVX512 vxi1 types are legal and we prefer using k-regs.
	// MOVMSK is supported in SSE2 or later.
	if (!Subtarget.hasSSE2() \|\| (Subtarget.hasAVX512() && !PreferMovMsk))
	return SDValue();

	// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
	// v8f64. So all legal 128-bit and 256-bit vectors are covered except for
	// v8i16 and v16i16.
	// For these two cases, we can shuffle the upper element bytes to a
	// consecutive sequence at the start of the vector and treat the results as
	// v16i8 or v32i8, and for v16i8 this is the preferable solution. However,
	// for v16i16 this is not the case, because the shuffle is expensive, so we
	// avoid sign-extending to this type entirely.
	// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
	// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
	MVT SExtVT;
	bool PropagateSExt = false;
	switch (SrcVT.getSimpleVT().SimpleTy) {
	default:
	return SDValue();
	case MVT::v2i1:
	SExtVT = MVT::v2i64;
	break;
	case MVT::v4i1:
	SExtVT = MVT::v4i32;
	// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
	// sign-extend to a 256-bit operation to avoid truncation.
	if (Subtarget.hasAVX() &&
	checkBitcastSrcVectorSize(Src, 256, Subtarget.hasAVX2())) {
	SExtVT = MVT::v4i64;
	PropagateSExt = true;
	}
	break;
	case MVT::v8i1:
	SExtVT = MVT::v8i16;
	// For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)),
	// sign-extend to a 256-bit operation to match the compare.
	// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
	// 256-bit because the shuffle is cheaper than sign extending the result of
	// the compare.
	if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256, true) \|\|
	checkBitcastSrcVectorSize(Src, 512, true))) {
	SExtVT = MVT::v8i32;
	PropagateSExt = true;
	}
	break;
	case MVT::v16i1:
	SExtVT = MVT::v16i8;
	// For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
	// it is not profitable to sign-extend to 256-bit because this will
	// require an extra cross-lane shuffle which is more expensive than
	// truncating the result of the compare to 128-bits.
	break;
	case MVT::v32i1:
	SExtVT = MVT::v32i8;
	break;
	case MVT::v64i1:
	// If we have AVX512F, but not AVX512BW and the input is truncated from
	// v64i8 checked earlier. Then split the input and make two pmovmskbs.
	if (Subtarget.hasAVX512()) {
	if (Subtarget.hasBWI())
	return SDValue();
	SExtVT = MVT::v64i8;
	break;
	}
	// Split if this is a <64 x i8> comparison result.
	if (checkBitcastSrcVectorSize(Src, 512, false)) {
	SExtVT = MVT::v64i8;
	break;
	}
	return SDValue();
	};

	SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
	: DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);

	if (SExtVT == MVT::v16i8 \|\| SExtVT == MVT::v32i8 \|\| SExtVT == MVT::v64i8) {
	V = getPMOVMSKB(DL, V, DAG, Subtarget);
	} else {
	if (SExtVT == MVT::v8i16)
	V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
	DAG.getUNDEF(MVT::v8i16));
	V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
	}

	EVT IntVT =
	EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
	V = DAG.getZExtOrTrunc(V, DL, IntVT);
	return DAG.getBitcast(VT, V);
	}

	// Convert a vXi1 constant build vector to the same width scalar integer.
	static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) {
	EVT SrcVT = Op.getValueType();
	assert(SrcVT.getVectorElementType() == MVT::i1 &&
	"Expected a vXi1 vector");
	assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
	"Expected a constant build vector");

	APInt Imm(SrcVT.getVectorNumElements(), 0);
	for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) {
	SDValue In = Op.getOperand(Idx);
	if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1))
	Imm.setBit(Idx);
	}
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth());
	return DAG.getConstant(Imm, SDLoc(Op), IntVT);
	}

	static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast");

	if (!DCI.isBeforeLegalizeOps())
	return SDValue();

	// Only do this if we have k-registers.
	if (!Subtarget.hasAVX512())
	return SDValue();

	EVT DstVT = N->getValueType(0);
	SDValue Op = N->getOperand(0);
	EVT SrcVT = Op.getValueType();

	if (!Op.hasOneUse())
	return SDValue();

	// Look for logic ops.
	if (Op.getOpcode() != ISD::AND &&
	Op.getOpcode() != ISD::OR &&
	Op.getOpcode() != ISD::XOR)
	return SDValue();

	// Make sure we have a bitcast between mask registers and a scalar type.
	if (!(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
	DstVT.isScalarInteger()) &&
	!(DstVT.isVector() && DstVT.getVectorElementType() == MVT::i1 &&
	SrcVT.isScalarInteger()))
	return SDValue();

	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);

	if (LHS.hasOneUse() && LHS.getOpcode() == ISD::BITCAST &&
	LHS.getOperand(0).getValueType() == DstVT)
	return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, LHS.getOperand(0),
	DAG.getBitcast(DstVT, RHS));

	if (RHS.hasOneUse() && RHS.getOpcode() == ISD::BITCAST &&
	RHS.getOperand(0).getValueType() == DstVT)
	return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT,
	DAG.getBitcast(DstVT, LHS), RHS.getOperand(0));

	// If the RHS is a vXi1 build vector, this is a good reason to flip too.
	// Most of these have to move a constant from the scalar domain anyway.
	if (ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) {
	RHS = combinevXi1ConstantToInteger(RHS, DAG);
	return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT,
	DAG.getBitcast(DstVT, LHS), RHS);
	}

	return SDValue();
	}

	static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(BV);
	unsigned NumElts = BV->getNumOperands();
	SDValue Splat = BV->getSplatValue();

	// Build MMX element from integer GPR or SSE float values.
	auto CreateMMXElement = [&](SDValue V) {
	if (V.isUndef())
	return DAG.getUNDEF(MVT::x86mmx);
	if (V.getValueType().isFloatingPoint()) {
	if (Subtarget.hasSSE1() && !isa<ConstantFPSDNode>(V)) {
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, V);
	V = DAG.getBitcast(MVT::v2i64, V);
	return DAG.getNode(X86ISD::MOVDQ2Q, DL, MVT::x86mmx, V);
	}
	V = DAG.getBitcast(MVT::i32, V);
	} else {
	V = DAG.getAnyExtOrTrunc(V, DL, MVT::i32);
	}
	return DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, V);
	};

	// Convert build vector ops to MMX data in the bottom elements.
	SmallVector<SDValue, 8> Ops;

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Broadcast - use (PUNPCKL+)PSHUFW to broadcast single element.
	if (Splat) {
	if (Splat.isUndef())
	return DAG.getUNDEF(MVT::x86mmx);

	Splat = CreateMMXElement(Splat);

	if (Subtarget.hasSSE1()) {
	// Unpack v8i8 to splat i8 elements to lowest 16-bits.
	if (NumElts == 8)
	Splat = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
	DAG.getTargetConstant(Intrinsic::x86_mmx_punpcklbw, DL,
	TLI.getPointerTy(DAG.getDataLayout())),
	Splat, Splat);

	// Use PSHUFW to repeat 16-bit elements.
	unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
	return DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
	DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL,
	TLI.getPointerTy(DAG.getDataLayout())),
	Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8));
	}
	Ops.append(NumElts, Splat);
	} else {
	for (unsigned i = 0; i != NumElts; ++i)
	Ops.push_back(CreateMMXElement(BV->getOperand(i)));
	}

	// Use tree of PUNPCKLs to build up general MMX vector.
	while (Ops.size() > 1) {
	unsigned NumOps = Ops.size();
	unsigned IntrinOp =
	(NumOps == 2 ? Intrinsic::x86_mmx_punpckldq
	: (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd
	: Intrinsic::x86_mmx_punpcklbw));
	SDValue Intrin = DAG.getTargetConstant(
	IntrinOp, DL, TLI.getPointerTy(DAG.getDataLayout()));
	for (unsigned i = 0; i != NumOps; i += 2)
	Ops[i / 2] = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, Intrin,
	Ops[i], Ops[i + 1]);
	Ops.resize(NumOps / 2);
	}

	return Ops[0];
	}

	// Recursive function that attempts to find if a bool vector node was originally
	// a vector/float/double that got truncated/extended/bitcast to/from a scalar
	// integer. If so, replace the scalar ops with bool vector equivalents back down
	// the chain.
	static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned Opc = V.getOpcode();
	switch (Opc) {
	case ISD::BITCAST: {
	// Bitcast from a vector/float/double, we can cheaply bitcast to VT.
	SDValue Src = V.getOperand(0);
	EVT SrcVT = Src.getValueType();
	if (SrcVT.isVector() \|\| SrcVT.isFloatingPoint())
	return DAG.getBitcast(VT, Src);
	break;
	}
	case ISD::TRUNCATE: {
	// If we find a suitable source, a truncated scalar becomes a subvector.
	SDValue Src = V.getOperand(0);
	EVT NewSrcVT =
	EVT::getVectorVT(*DAG.getContext(), MVT::i1, Src.getValueSizeInBits());
	if (TLI.isTypeLegal(NewSrcVT))
	if (SDValue N0 =
	combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget))
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0,
	DAG.getIntPtrConstant(0, DL));
	break;
	}
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND: {
	// If we find a suitable source, an extended scalar becomes a subvector.
	SDValue Src = V.getOperand(0);
	EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
	Src.getScalarValueSizeInBits());
	if (TLI.isTypeLegal(NewSrcVT))
	if (SDValue N0 =
	combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget))
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
	Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT)
	: DAG.getConstant(0, DL, VT),
	N0, DAG.getIntPtrConstant(0, DL));
	break;
	}
	case ISD::OR: {
	// If we find suitable sources, we can just move an OR to the vector domain.
	SDValue Src0 = V.getOperand(0);
	SDValue Src1 = V.getOperand(1);
	if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget))
	if (SDValue N1 = combineBitcastToBoolVector(VT, Src1, DL, DAG, Subtarget))
	return DAG.getNode(Opc, DL, VT, N0, N1);
	break;
	}
	case ISD::SHL: {
	// If we find a suitable source, a SHL becomes a KSHIFTL.
	SDValue Src0 = V.getOperand(0);
	if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) \|\|
	((VT == MVT::v32i1 \|\| VT == MVT::v64i1) && !Subtarget.hasBWI()))
	break;

	if (auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1)))
	if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget))
	return DAG.getNode(
	X86ISD::KSHIFTL, DL, VT, N0,
	DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8));
	break;
	}
	}
	return SDValue();
	}

	static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT SrcVT = N0.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Try to match patterns such as
	// (i16 bitcast (v16i1 x))
	// ->
	// (i16 movmsk (16i8 sext (v16i1 x)))
	// before the setcc result is scalarized on subtargets that don't have legal
	// vxi1 types.
	if (DCI.isBeforeLegalize()) {
	SDLoc dl(N);
	if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
	return V;

	// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
	// type, widen both sides to avoid a trip through memory.
	if ((VT == MVT::v4i1 \|\| VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
	Subtarget.hasAVX512()) {
	N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
	N0 = DAG.getBitcast(MVT::v8i1, N0);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
	DAG.getIntPtrConstant(0, dl));
	}

	// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
	// type, widen both sides to avoid a trip through memory.
	if ((SrcVT == MVT::v4i1 \|\| SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
	Subtarget.hasAVX512()) {
	// Use zeros for the widening if we already have some zeroes. This can
	// allow SimplifyDemandedBits to remove scalar ANDs that may be down
	// stream of this.
	// FIXME: It might make sense to detect a concat_vectors with a mix of
	// zeroes and undef and turn it into insert_subvector for i1 vectors as
	// a separate combine. What we can't do is canonicalize the operands of
	// such a concat or we'll get into a loop with SimplifyDemandedBits.
	if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
	SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1);
	if (ISD::isBuildVectorAllZeros(LastOp.getNode())) {
	SrcVT = LastOp.getValueType();
	unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
	SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end());
	Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT));
	N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
	N0 = DAG.getBitcast(MVT::i8, N0);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
	}
	}

	unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
	SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
	Ops[0] = N0;
	N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
	N0 = DAG.getBitcast(MVT::i8, N0);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
	}
	} else {
	// If we're bitcasting from iX to vXi1, see if the integer originally
	// began as a vXi1 and whether we can remove the bitcast entirely.
	if (VT.isVector() && VT.getScalarType() == MVT::i1 &&
	SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) {
	if (SDValue V =
	combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget))
	return V;
	}
	}

	// Look for (i8 (bitcast (v8i1 (extract_subvector (v16i1 X), 0)))) and
	// replace with (i8 (trunc (i16 (bitcast (v16i1 X))))). This can occur
	// due to insert_subvector legalization on KNL. By promoting the copy to i16
	// we can help with known bits propagation from the vXi1 domain to the
	// scalar domain.
	if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() &&
	!Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N0.getOperand(0).getValueType() == MVT::v16i1 &&
	isNullConstant(N0.getOperand(1)))
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
	DAG.getBitcast(MVT::i16, N0.getOperand(0)));

	// Canonicalize (bitcast (vbroadcast_load)) so that the output of the bitcast
	// and the vbroadcast_load are both integer or both fp. In some cases this
	// will remove the bitcast entirely.
	if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() &&
	VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {
	auto *BCast = cast<MemIntrinsicSDNode>(N0);
	unsigned SrcVTSize = SrcVT.getScalarSizeInBits();
	unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();
	// Don't swap i8/i16 since don't have fp types that size.
	if (MemSize >= 32) {
	MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize)
	: MVT::getIntegerVT(MemSize);
	MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize)
	: MVT::getIntegerVT(SrcVTSize);
	LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements());

	SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
	SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
	SDValue ResNode =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
	MemVT, BCast->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1));
	return DAG.getBitcast(VT, ResNode);
	}
	}

	// Since MMX types are special and don't usually play with other vector types,
	// it's better to handle them early to be sure we emit efficient code by
	// avoiding store-load conversions.
	if (VT == MVT::x86mmx) {
	// Detect MMX constant vectors.
	APInt UndefElts;
	SmallVector<APInt, 1> EltBits;
	if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) {
	SDLoc DL(N0);
	// Handle zero-extension of i32 with MOVD.
	if (EltBits[0].countLeadingZeros() >= 32)
	return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
	DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32));
	// Else, bitcast to a double.
	// TODO - investigate supporting sext 32-bit immediates on x86_64.
	APFloat F64(APFloat::IEEEdouble(), EltBits[0]);
	return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64));
	}

	// Detect bitcasts to x86mmx low word.
	if (N0.getOpcode() == ISD::BUILD_VECTOR &&
	(SrcVT == MVT::v2i32 \|\| SrcVT == MVT::v4i16 \|\| SrcVT == MVT::v8i8) &&
	N0.getOperand(0).getValueType() == SrcVT.getScalarType()) {
	bool LowUndef = true, AllUndefOrZero = true;
	for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) {
	SDValue Op = N0.getOperand(i);
	LowUndef &= Op.isUndef() \|\| (i >= e/2);
	AllUndefOrZero &= (Op.isUndef() \|\| isNullConstant(Op));
	}
	if (AllUndefOrZero) {
	SDValue N00 = N0.getOperand(0);
	SDLoc dl(N00);
	N00 = LowUndef ? DAG.getAnyExtOrTrunc(N00, dl, MVT::i32)
	: DAG.getZExtOrTrunc(N00, dl, MVT::i32);
	return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00);
	}
	}

	// Detect bitcasts of 64-bit build vectors and convert to a
	// MMX UNPCK/PSHUFW which takes MMX type inputs with the value in the
	// lowest element.
	if (N0.getOpcode() == ISD::BUILD_VECTOR &&
	(SrcVT == MVT::v2f32 \|\| SrcVT == MVT::v2i32 \|\| SrcVT == MVT::v4i16 \|\|
	SrcVT == MVT::v8i8))
	return createMMXBuildVector(cast<BuildVectorSDNode>(N0), DAG, Subtarget);

	// Detect bitcasts between element or subvector extraction to x86mmx.
	if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
	N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) &&
	isNullConstant(N0.getOperand(1))) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getValueType().is128BitVector())
	return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT,
	DAG.getBitcast(MVT::v2i64, N00));
	}

	// Detect bitcasts from FP_TO_SINT to x86mmx.
	if (SrcVT == MVT::v2i32 && N0.getOpcode() == ISD::FP_TO_SINT) {
	SDLoc DL(N0);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
	DAG.getUNDEF(MVT::v2i32));
	return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT,
	DAG.getBitcast(MVT::v2i64, Res));
	}
	}

	// Try to remove a bitcast of constant vXi1 vector. We have to legalize
	// most of these to scalar anyway.
	if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
	SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
	ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
	return combinevXi1ConstantToInteger(N0, DAG);
	}

	if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
	VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
	isa<ConstantSDNode>(N0)) {
	auto *C = cast<ConstantSDNode>(N0);
	if (C->isAllOnes())
	return DAG.getConstant(1, SDLoc(N0), VT);
	if (C->isZero())
	return DAG.getConstant(0, SDLoc(N0), VT);
	}

	// Look for MOVMSK that is maybe truncated and then bitcasted to vXi1.
	// Turn it into a sign bit compare that produces a k-register. This avoids
	// a trip through a GPR.
	if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
	VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
	isPowerOf2_32(VT.getVectorNumElements())) {
	unsigned NumElts = VT.getVectorNumElements();
	SDValue Src = N0;

	// Peek through truncate.
	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
	Src = N0.getOperand(0);

	if (Src.getOpcode() == X86ISD::MOVMSK && Src.hasOneUse()) {
	SDValue MovmskIn = Src.getOperand(0);
	MVT MovmskVT = MovmskIn.getSimpleValueType();
	unsigned MovMskElts = MovmskVT.getVectorNumElements();

	// We allow extra bits of the movmsk to be used since they are known zero.
	// We can't convert a VPMOVMSKB without avx512bw.
	if (MovMskElts <= NumElts &&
	(Subtarget.hasBWI() \|\| MovmskVT.getVectorElementType() != MVT::i8)) {
	EVT IntVT = EVT(MovmskVT).changeVectorElementTypeToInteger();
	MovmskIn = DAG.getBitcast(IntVT, MovmskIn);
	SDLoc dl(N);
	MVT CmpVT = MVT::getVectorVT(MVT::i1, MovMskElts);
	SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn,
	DAG.getConstant(0, dl, IntVT), ISD::SETLT);
	if (EVT(CmpVT) == VT)
	return Cmp;

	// Pad with zeroes up to original VT to replace the zeroes that were
	// being used from the MOVMSK.
	unsigned NumConcats = NumElts / MovMskElts;
	SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT));
	Ops[0] = Cmp;
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops);
	}
	}
	}

	// Try to remove bitcasts from input and output of mask arithmetic to
	// remove GPR<->K-register crossings.
	if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
	return V;

	// Convert a bitcasted integer logic operation that has one bitcasted
	// floating-point operand into a floating-point logic operation. This may
	// create a load of a constant, but that is cheaper than materializing the
	// constant in an integer register and transferring it to an SSE register or
	// transferring the SSE operand to integer register and back.
	unsigned FPOpcode;
	switch (N0.getOpcode()) {
	case ISD::AND: FPOpcode = X86ISD::FAND; break;
	case ISD::OR: FPOpcode = X86ISD::FOR; break;
	case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
	default: return SDValue();
	}

	// Check if we have a bitcast from another integer type as well.
	if (!((Subtarget.hasSSE1() && VT == MVT::f32) \|\|
	(Subtarget.hasSSE2() && VT == MVT::f64) \|\|
	(Subtarget.hasFP16() && VT == MVT::f16) \|\|
	(Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() &&
	TLI.isTypeLegal(VT))))
	return SDValue();

	SDValue LogicOp0 = N0.getOperand(0);
	SDValue LogicOp1 = N0.getOperand(1);
	SDLoc DL0(N0);

	// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
	if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
	LogicOp0.hasOneUse() && LogicOp0.getOperand(0).hasOneUse() &&
	LogicOp0.getOperand(0).getValueType() == VT &&
	!isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
	SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
	unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode();
	return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
	}
	// bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
	if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
	LogicOp1.hasOneUse() && LogicOp1.getOperand(0).hasOneUse() &&
	LogicOp1.getOperand(0).getValueType() == VT &&
	!isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
	SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
	unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode();
	return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
	}

	return SDValue();
	}

	// (mul (zext a), (sext, b))
	static bool detectExtMul(SelectionDAG &DAG, const SDValue &Mul, SDValue &Op0,
	SDValue &Op1) {
	Op0 = Mul.getOperand(0);
	Op1 = Mul.getOperand(1);

	// The operand1 should be signed extend
	if (Op0.getOpcode() == ISD::SIGN_EXTEND)
	std::swap(Op0, Op1);

	auto IsFreeTruncation = [](SDValue &Op) -> bool {
	if ((Op.getOpcode() == ISD::ZERO_EXTEND \|\|
	Op.getOpcode() == ISD::SIGN_EXTEND) &&
	Op.getOperand(0).getScalarValueSizeInBits() <= 8)
	return true;

	auto *BV = dyn_cast<BuildVectorSDNode>(Op);
	return (BV && BV->isConstant());
	};

	// (dpbusd (zext a), (sext, b)). Since the first operand should be unsigned
	// value, we need to check Op0 is zero extended value. Op1 should be signed
	// value, so we just check the signed bits.
	if ((IsFreeTruncation(Op0) &&
	DAG.computeKnownBits(Op0).countMaxActiveBits() <= 8) &&
	(IsFreeTruncation(Op1) && DAG.ComputeMaxSignificantBits(Op1) <= 8))
	return true;

	return false;
	}

	// Given a ABS node, detect the following pattern:
	// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))).
	// This is useful as it is the input into a SAD pattern.
	static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) {
	SDValue AbsOp1 = Abs->getOperand(0);
	if (AbsOp1.getOpcode() != ISD::SUB)
	return false;

	Op0 = AbsOp1.getOperand(0);
	Op1 = AbsOp1.getOperand(1);

	// Check if the operands of the sub are zero-extended from vectors of i8.
	if (Op0.getOpcode() != ISD::ZERO_EXTEND \|\|
	Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 \|\|
	Op1.getOpcode() != ISD::ZERO_EXTEND \|\|
	Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8)
	return false;

	return true;
	}

	static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
	unsigned &LogBias, const SDLoc &DL,
	const X86Subtarget &Subtarget) {
	// Extend or truncate to MVT::i8 first.
	MVT Vi8VT =
	MVT::getVectorVT(MVT::i8, LHS.getValueType().getVectorElementCount());
	LHS = DAG.getZExtOrTrunc(LHS, DL, Vi8VT);
	RHS = DAG.getSExtOrTrunc(RHS, DL, Vi8VT);

	// VPDPBUSD(<16 x i32>C, <16 x i8>A, <16 x i8>B). For each dst element
	// C[0] = C[0] + A[0]B[0] + A[1]B[1] + A[2]B[2] + A[3]B[3].
	// The src A, B element type is i8, but the dst C element type is i32.
	// When we calculate the reduce stage, we use src vector type vXi8 for it
	// so we need logbias 2 to avoid extra 2 stages.
	LogBias = 2;

	unsigned RegSize = std::max(128u, (unsigned)Vi8VT.getSizeInBits());
	if (Subtarget.hasVNNI() && !Subtarget.hasVLX())
	RegSize = std::max(512u, RegSize);

	// "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
	// fill in the missing vector elements with 0.
	unsigned NumConcat = RegSize / Vi8VT.getSizeInBits();
	SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, Vi8VT));
	Ops[0] = LHS;
	MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
	SDValue DpOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
	Ops[0] = RHS;
	SDValue DpOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);

	// Actually build the DotProduct, split as 256/512 bits for
	// AVXVNNI/AVX512VNNI.
	auto DpBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
	return DAG.getNode(X86ISD::VPDPBUSD, DL, VT, Ops);
	};
	MVT DpVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
	SDValue Zero = DAG.getConstant(0, DL, DpVT);

	return SplitOpsAndApply(DAG, Subtarget, DL, DpVT, {Zero, DpOp0, DpOp1},
	DpBuilder, false);
	}

	// Given two zexts of <k x i8> to <k x i32>, create a PSADBW of the inputs
	// to these zexts.
	static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
	const SDValue &Zext1, const SDLoc &DL,
	const X86Subtarget &Subtarget) {
	// Find the appropriate width for the PSADBW.
	EVT InVT = Zext0.getOperand(0).getValueType();
	unsigned RegSize = std::max(128u, (unsigned)InVT.getSizeInBits());

	// "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
	// fill in the missing vector elements with 0.
	unsigned NumConcat = RegSize / InVT.getSizeInBits();
	SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT));
	Ops[0] = Zext0.getOperand(0);
	MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
	SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
	Ops[0] = Zext1.getOperand(0);
	SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);

	// Actually build the SAD, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
	auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64);
	return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops);
	};
	MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
	return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 },
	PSADBWBuilder);
	}

	// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with
	// PHMINPOSUW.
	static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Bail without SSE41.
	if (!Subtarget.hasSSE41())
	return SDValue();

	EVT ExtractVT = Extract->getValueType(0);
	if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8)
	return SDValue();

	// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
	ISD::NodeType BinOp;
	SDValue Src = DAG.matchBinOpReduction(
	Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true);
	if (!Src)
	return SDValue();

	EVT SrcVT = Src.getValueType();
	EVT SrcSVT = SrcVT.getScalarType();
	if (SrcSVT != ExtractVT \|\| (SrcVT.getSizeInBits() % 128) != 0)
	return SDValue();

	SDLoc DL(Extract);
	SDValue MinPos = Src;

	// First, reduce the source down to 128-bit, applying BinOp to lo/hi.
	while (SrcVT.getSizeInBits() > 128) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = splitVector(MinPos, DAG, DL);
	SrcVT = Lo.getValueType();
	MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
	}
	assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) \|\|
	(SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) &&
	"Unexpected value type");

	// PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask
	// to flip the value accordingly.
	SDValue Mask;
	unsigned MaskEltsBits = ExtractVT.getSizeInBits();
	if (BinOp == ISD::SMAX)
	Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT);
	else if (BinOp == ISD::SMIN)
	Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT);
	else if (BinOp == ISD::UMAX)
	Mask = DAG.getAllOnesConstant(DL, SrcVT);

	if (Mask)
	MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);

	// For v16i8 cases we need to perform UMIN on pairs of byte elements,
	// shuffling each upper element down and insert zeros. This means that the
	// v16i8 UMIN will leave the upper element as zero, performing zero-extension
	// ready for the PHMINPOS.
	if (ExtractVT == MVT::i8) {
	SDValue Upper = DAG.getVectorShuffle(
	SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8),
	{1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
	MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
	}

	// Perform the PHMINPOS on a v8i16 vector,
	MinPos = DAG.getBitcast(MVT::v8i16, MinPos);
	MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos);
	MinPos = DAG.getBitcast(SrcVT, MinPos);

	if (Mask)
	MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos,
	DAG.getIntPtrConstant(0, DL));
	}

	// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
	static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Bail without SSE2.
	if (!Subtarget.hasSSE2())
	return SDValue();

	EVT ExtractVT = Extract->getValueType(0);
	unsigned BitWidth = ExtractVT.getSizeInBits();
	if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
	ExtractVT != MVT::i8 && ExtractVT != MVT::i1)
	return SDValue();

	// Check for OR(any_of)/AND(all_of)/XOR(parity) horizontal reduction patterns.
	ISD::NodeType BinOp;
	SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
	if (!Match && ExtractVT == MVT::i1)
	Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::XOR});
	if (!Match)
	return SDValue();

	// EXTRACT_VECTOR_ELT can require implicit extension of the vector element
	// which we can't support here for now.
	if (Match.getScalarValueSizeInBits() != BitWidth)
	return SDValue();

	SDValue Movmsk;
	SDLoc DL(Extract);
	EVT MatchVT = Match.getValueType();
	unsigned NumElts = MatchVT.getVectorNumElements();
	unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (ExtractVT == MVT::i1) {
	// Special case for (pre-legalization) vXi1 reductions.
	if (NumElts > 64 \|\| !isPowerOf2_32(NumElts))
	return SDValue();
	if (TLI.isTypeLegal(MatchVT)) {
	// If this is a legal AVX512 predicate type then we can just bitcast.
	EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
	Movmsk = DAG.getBitcast(MovmskVT, Match);
	} else {
	// For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()).
	if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
	cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
	ISD::CondCode::SETEQ) {
	EVT VecSVT = Match.getOperand(0).getValueType().getScalarType();
	if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) {
	NumElts *= VecSVT.getSizeInBits() / 8;
	EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts);
	MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
	Match = DAG.getSetCC(
	DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
	DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ);
	}
	}

	// Use combineBitcastvxi1 to create the MOVMSK.
	while (NumElts > MaxElts) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
	Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
	NumElts /= 2;
	}
	EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
	Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget);
	}
	if (!Movmsk)
	return SDValue();
	Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32);
	} else {
	// FIXME: Better handling of k-registers or 512-bit vectors?
	unsigned MatchSizeInBits = Match.getValueSizeInBits();
	if (!(MatchSizeInBits == 128 \|\|
	(MatchSizeInBits == 256 && Subtarget.hasAVX())))
	return SDValue();

	// Make sure this isn't a vector of 1 element. The perf win from using
	// MOVMSK diminishes with less elements in the reduction, but it is
	// generally better to get the comparison over to the GPRs as soon as
	// possible to reduce the number of vector ops.
	if (Match.getValueType().getVectorNumElements() < 2)
	return SDValue();

	// Check that we are extracting a reduction of all sign bits.
	if (DAG.ComputeNumSignBits(Match) != BitWidth)
	return SDValue();

	if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
	Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
	MatchSizeInBits = Match.getValueSizeInBits();
	}

	// For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
	MVT MaskSrcVT;
	if (64 == BitWidth \|\| 32 == BitWidth)
	MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
	MatchSizeInBits / BitWidth);
	else
	MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);

	SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match);
	Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget);
	NumElts = MaskSrcVT.getVectorNumElements();
	}
	assert((NumElts <= 32 \|\| NumElts == 64) &&
	"Not expecting more than 64 elements");

	MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32;
	if (BinOp == ISD::XOR) {
	// parity -> (PARITY(MOVMSK X))
	SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk);
	return DAG.getZExtOrTrunc(Result, DL, ExtractVT);
	}

	SDValue CmpC;
	ISD::CondCode CondCode;
	if (BinOp == ISD::OR) {
	// any_of -> MOVMSK != 0
	CmpC = DAG.getConstant(0, DL, CmpVT);
	CondCode = ISD::CondCode::SETNE;
	} else {
	// all_of -> MOVMSK == ((1 << NumElts) - 1)
	CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts),
	DL, CmpVT);
	CondCode = ISD::CondCode::SETEQ;
	}

	// The setcc produces an i8 of 0/1, so extend that to the result width and
	// negate to get the final 0/-1 mask value.
	EVT SetccVT =
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
	SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
	SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
	SDValue Zero = DAG.getConstant(0, DL, ExtractVT);
	return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext);
	}

	static SDValue combineVPDPBUSDPattern(SDNode *Extract, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasVNNI() && !Subtarget.hasAVXVNNI())
	return SDValue();

	EVT ExtractVT = Extract->getValueType(0);
	// Verify the type we're extracting is i32, as the output element type of
	// vpdpbusd is i32.
	if (ExtractVT != MVT::i32)
	return SDValue();

	EVT VT = Extract->getOperand(0).getValueType();
	if (!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();

	// Match shuffle + add pyramid.
	ISD::NodeType BinOp;
	SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});

	// We can't combine to vpdpbusd for zext, because each of the 4 multiplies
	// done by vpdpbusd compute a signed 16-bit product that will be sign extended
	// before adding into the accumulator.
	// TODO:
	// We also need to verify that the multiply has at least 2x the number of bits
	// of the input. We shouldn't match
	// (sign_extend (mul (vXi9 (zext (vXi8 X))), (vXi9 (zext (vXi8 Y)))).
	// if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND))
	// Root = Root.getOperand(0);

	// If there was a match, we want Root to be a mul.
	if (!Root \|\| Root.getOpcode() != ISD::MUL)
	return SDValue();

	// Check whether we have an extend and mul pattern
	SDValue LHS, RHS;
	if (!detectExtMul(DAG, Root, LHS, RHS))
	return SDValue();

	// Create the dot product instruction.
	SDLoc DL(Extract);
	unsigned StageBias;
	SDValue DP = createVPDPBUSD(DAG, LHS, RHS, StageBias, DL, Subtarget);

	// If the original vector was wider than 4 elements, sum over the results
	// in the DP vector.
	unsigned Stages = Log2_32(VT.getVectorNumElements());
	EVT DpVT = DP.getValueType();

	if (Stages > StageBias) {
	unsigned DpElems = DpVT.getVectorNumElements();

	for (unsigned i = Stages - StageBias; i > 0; --i) {
	SmallVector<int, 16> Mask(DpElems, -1);
	for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
	Mask[j] = MaskEnd + j;

	SDValue Shuffle =
	DAG.getVectorShuffle(DpVT, DL, DP, DAG.getUNDEF(DpVT), Mask);
	DP = DAG.getNode(ISD::ADD, DL, DpVT, DP, Shuffle);
	}
	}

	// Return the lowest ExtractSizeInBits bits.
	EVT ResVT =
	EVT::getVectorVT(*DAG.getContext(), ExtractVT,
	DpVT.getSizeInBits() / ExtractVT.getSizeInBits());
	DP = DAG.getBitcast(ResVT, DP);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, DP,
	Extract->getOperand(1));
	}

	static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// PSADBW is only supported on SSE2 and up.
	if (!Subtarget.hasSSE2())
	return SDValue();

	EVT ExtractVT = Extract->getValueType(0);
	// Verify the type we're extracting is either i32 or i64.
	// FIXME: Could support other types, but this is what we have coverage for.
	if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64)
	return SDValue();

	EVT VT = Extract->getOperand(0).getValueType();
	if (!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();

	// Match shuffle + add pyramid.
	ISD::NodeType BinOp;
	SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});

	// The operand is expected to be zero extended from i8
	// (verified in detectZextAbsDiff).
	// In order to convert to i64 and above, additional any/zero/sign
	// extend is expected.
	// The zero extend from 32 bit has no mathematical effect on the result.
	// Also the sign extend is basically zero extend
	// (extends the sign bit which is zero).
	// So it is correct to skip the sign/zero extend instruction.
	if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND \|\|
	Root.getOpcode() == ISD::ZERO_EXTEND \|\|
	Root.getOpcode() == ISD::ANY_EXTEND))
	Root = Root.getOperand(0);

	// If there was a match, we want Root to be a select that is the root of an
	// abs-diff pattern.
	if (!Root \|\| Root.getOpcode() != ISD::ABS)
	return SDValue();

	// Check whether we have an abs-diff pattern feeding into the select.
	SDValue Zext0, Zext1;
	if (!detectZextAbsDiff(Root, Zext0, Zext1))
	return SDValue();

	// Create the SAD instruction.
	SDLoc DL(Extract);
	SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget);

	// If the original vector was wider than 8 elements, sum over the results
	// in the SAD vector.
	unsigned Stages = Log2_32(VT.getVectorNumElements());
	EVT SadVT = SAD.getValueType();
	if (Stages > 3) {
	unsigned SadElems = SadVT.getVectorNumElements();

	for(unsigned i = Stages - 3; i > 0; --i) {
	SmallVector<int, 16> Mask(SadElems, -1);
	for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
	Mask[j] = MaskEnd + j;

	SDValue Shuffle =
	DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask);
	SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle);
	}
	}

	unsigned ExtractSizeInBits = ExtractVT.getSizeInBits();
	// Return the lowest ExtractSizeInBits bits.
	EVT ResVT = EVT::getVectorVT(*DAG.getContext(), ExtractVT,
	SadVT.getSizeInBits() / ExtractSizeInBits);
	SAD = DAG.getBitcast(ResVT, SAD);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, SAD,
	Extract->getOperand(1));
	}

	// Attempt to peek through a target shuffle and extract the scalar from the
	// source.
	static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SDLoc dl(N);
	SDValue Src = N->getOperand(0);
	SDValue Idx = N->getOperand(1);

	EVT VT = N->getValueType(0);
	EVT SrcVT = Src.getValueType();
	EVT SrcSVT = SrcVT.getVectorElementType();
	unsigned SrcEltBits = SrcSVT.getSizeInBits();
	unsigned NumSrcElts = SrcVT.getVectorNumElements();

	// Don't attempt this for boolean mask vectors or unknown extraction indices.
	if (SrcSVT == MVT::i1 \|\| !isa<ConstantSDNode>(Idx))
	return SDValue();

	const APInt &IdxC = N->getConstantOperandAPInt(1);
	if (IdxC.uge(NumSrcElts))
	return SDValue();

	SDValue SrcBC = peekThroughBitcasts(Src);

	// Handle extract(bitcast(broadcast(scalar_value))).
	if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
	SDValue SrcOp = SrcBC.getOperand(0);
	EVT SrcOpVT = SrcOp.getValueType();
	if (SrcOpVT.isScalarInteger() && VT.isInteger() &&
	(SrcOpVT.getSizeInBits() % SrcEltBits) == 0) {
	unsigned Scale = SrcOpVT.getSizeInBits() / SrcEltBits;
	unsigned Offset = IdxC.urem(Scale) * SrcEltBits;
	// TODO support non-zero offsets.
	if (Offset == 0) {
	SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType());
	SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);
	return SrcOp;
	}
	}
	}

	// If we're extracting a single element from a broadcast load and there are
	// no other users, just create a single load.
	if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) {
	auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
	unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
	if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
	VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {
	SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
	MemIntr->getBasePtr(),
	MemIntr->getPointerInfo(),
	MemIntr->getOriginalAlign(),
	MemIntr->getMemOperand()->getFlags());
	DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
	return Load;
	}
	}

	// Handle extract(bitcast(scalar_to_vector(scalar_value))) for integers.
	// TODO: Move to DAGCombine?
	if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
	SrcBC.getValueType().isInteger() &&
	(SrcBC.getScalarValueSizeInBits() % SrcEltBits) == 0 &&
	SrcBC.getScalarValueSizeInBits() ==
	SrcBC.getOperand(0).getValueSizeInBits()) {
	unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcEltBits;
	if (IdxC.ult(Scale)) {
	unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits();
	SDValue Scl = SrcBC.getOperand(0);
	EVT SclVT = Scl.getValueType();
	if (Offset) {
	Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl,
	DAG.getShiftAmountConstant(Offset, SclVT, dl));
	}
	Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType());
	Scl = DAG.getZExtOrTrunc(Scl, dl, VT);
	return Scl;
	}
	}

	// Handle extract(truncate(x)) for 0'th index.
	// TODO: Treat this as a faux shuffle?
	// TODO: When can we use this for general indices?
	if (ISD::TRUNCATE == Src.getOpcode() && IdxC == 0 &&
	(SrcVT.getSizeInBits() % 128) == 0) {
	Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl);
	MVT ExtractVT = MVT::getVectorVT(SrcSVT.getSimpleVT(), 128 / SrcEltBits);
	return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src),
	Idx);
	}

	// We can only legally extract other elements from 128-bit vectors and in
	// certain circumstances, depending on SSE-level.
	// TODO: Investigate float/double extraction if it will be just stored.
	auto GetLegalExtract = [&Subtarget, &DAG, &dl](SDValue Vec, EVT VecVT,
	unsigned Idx) {
	EVT VecSVT = VecVT.getScalarType();
	if ((VecVT.is256BitVector() \|\| VecVT.is512BitVector()) &&
	(VecSVT == MVT::i8 \|\| VecSVT == MVT::i16 \|\| VecSVT == MVT::i32 \|\|
	VecSVT == MVT::i64)) {
	unsigned EltSizeInBits = VecSVT.getSizeInBits();
	unsigned NumEltsPerLane = 128 / EltSizeInBits;
	unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits;
	unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits();
	VecVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumEltsPerLane);
	Vec = extract128BitVector(Vec, LaneIdx, DAG, dl);
	Idx &= (NumEltsPerLane - 1);
	}
	if ((VecVT == MVT::v4i32 \|\| VecVT == MVT::v2i64) &&
	((Idx == 0 && Subtarget.hasSSE2()) \|\| Subtarget.hasSSE41())) {
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VecVT.getScalarType(),
	DAG.getBitcast(VecVT, Vec),
	DAG.getIntPtrConstant(Idx, dl));
	}
	if ((VecVT == MVT::v8i16 && Subtarget.hasSSE2()) \|\|
	(VecVT == MVT::v16i8 && Subtarget.hasSSE41())) {
	unsigned OpCode = (VecVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
	return DAG.getNode(OpCode, dl, MVT::i32, DAG.getBitcast(VecVT, Vec),
	DAG.getTargetConstant(Idx, dl, MVT::i8));
	}
	return SDValue();
	};

	// Resolve the target shuffle inputs and mask.
	SmallVector<int, 16> Mask;
	SmallVector<SDValue, 2> Ops;
	if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG))
	return SDValue();

	// Shuffle inputs must be the same size as the result.
	if (llvm::any_of(Ops, [SrcVT](SDValue Op) {
	return SrcVT.getSizeInBits() != Op.getValueSizeInBits();
	}))
	return SDValue();

	// Attempt to narrow/widen the shuffle mask to the correct size.
	if (Mask.size() != NumSrcElts) {
	if ((NumSrcElts % Mask.size()) == 0) {
	SmallVector<int, 16> ScaledMask;
	int Scale = NumSrcElts / Mask.size();
	narrowShuffleMaskElts(Scale, Mask, ScaledMask);
	Mask = std::move(ScaledMask);
	} else if ((Mask.size() % NumSrcElts) == 0) {
	// Simplify Mask based on demanded element.
	int ExtractIdx = (int)IdxC.getZExtValue();
	int Scale = Mask.size() / NumSrcElts;
	int Lo = Scale * ExtractIdx;
	int Hi = Scale * (ExtractIdx + 1);
	for (int i = 0, e = (int)Mask.size(); i != e; ++i)
	if (i < Lo \|\| Hi <= i)
	Mask[i] = SM_SentinelUndef;

	SmallVector<int, 16> WidenedMask;
	while (Mask.size() > NumSrcElts &&
	canWidenShuffleElements(Mask, WidenedMask))
	Mask = std::move(WidenedMask);
	}
	}

	// If narrowing/widening failed, see if we can extract+zero-extend.
	int ExtractIdx;
	EVT ExtractVT;
	if (Mask.size() == NumSrcElts) {
	ExtractIdx = Mask[IdxC.getZExtValue()];
	ExtractVT = SrcVT;
	} else {
	unsigned Scale = Mask.size() / NumSrcElts;
	if ((Mask.size() % NumSrcElts) != 0 \|\| SrcVT.isFloatingPoint())
	return SDValue();
	unsigned ScaledIdx = Scale * IdxC.getZExtValue();
	if (!isUndefOrZeroInRange(Mask, ScaledIdx + 1, Scale - 1))
	return SDValue();
	ExtractIdx = Mask[ScaledIdx];
	EVT ExtractSVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltBits / Scale);
	ExtractVT = EVT::getVectorVT(*DAG.getContext(), ExtractSVT, Mask.size());
	assert(SrcVT.getSizeInBits() == ExtractVT.getSizeInBits() &&
	"Failed to widen vector type");
	}

	// If the shuffle source element is undef/zero then we can just accept it.
	if (ExtractIdx == SM_SentinelUndef)
	return DAG.getUNDEF(VT);

	if (ExtractIdx == SM_SentinelZero)
	return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT)
	: DAG.getConstant(0, dl, VT);

	SDValue SrcOp = Ops[ExtractIdx / Mask.size()];
	ExtractIdx = ExtractIdx % Mask.size();
	if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx))
	return DAG.getZExtOrTrunc(V, dl, VT);

	return SDValue();
	}

	/// Extracting a scalar FP value from vector element 0 is free, so extract each
	/// operand first, then perform the math as a scalar op.
	static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
	SDValue Vec = ExtElt->getOperand(0);
	SDValue Index = ExtElt->getOperand(1);
	EVT VT = ExtElt->getValueType(0);
	EVT VecVT = Vec.getValueType();

	// TODO: If this is a unary/expensive/expand op, allow extraction from a
	// non-zero element because the shuffle+scalar op will be cheaper?
	if (!Vec.hasOneUse() \|\| !isNullConstant(Index) \|\| VecVT.getScalarType() != VT)
	return SDValue();

	// Vector FP compares don't fit the pattern of FP math ops (propagate, not
	// extract, the condition code), so deal with those as a special-case.
	if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) {
	EVT OpVT = Vec.getOperand(0).getValueType().getScalarType();
	if (OpVT != MVT::f32 && OpVT != MVT::f64)
	return SDValue();

	// extract (setcc X, Y, CC), 0 --> setcc (extract X, 0), (extract Y, 0), CC
	SDLoc DL(ExtElt);
	SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
	Vec.getOperand(0), Index);
	SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
	Vec.getOperand(1), Index);
	return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2));
	}

	if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 &&
	VT != MVT::f64)
	return SDValue();

	// Vector FP selects don't fit the pattern of FP math ops (because the
	// condition has a different type and we have to change the opcode), so deal
	// with those here.
	// FIXME: This is restricted to pre type legalization by ensuring the setcc
	// has i1 elements. If we loosen this we need to convert vector bool to a
	// scalar bool.
	if (Vec.getOpcode() == ISD::VSELECT &&
	Vec.getOperand(0).getOpcode() == ISD::SETCC &&
	Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
	Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
	// ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
	SDLoc DL(ExtElt);
	SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
	Vec.getOperand(0).getValueType().getScalarType(),
	Vec.getOperand(0), Index);
	SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	Vec.getOperand(1), Index);
	SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	Vec.getOperand(2), Index);
	return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
	}

	// TODO: This switch could include FNEG and the x86-specific FP logic ops
	// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
	// missed load folding and fma+fneg combining.
	switch (Vec.getOpcode()) {
	case ISD::FMA: // Begin 3 operands
	case ISD::FMAD:
	case ISD::FADD: // Begin 2 operands
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FDIV:
	case ISD::FREM:
	case ISD::FCOPYSIGN:
	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	case ISD::FMINNUM_IEEE:
	case ISD::FMAXNUM_IEEE:
	case ISD::FMAXIMUM:
	case ISD::FMINIMUM:
	case X86ISD::FMAX:
	case X86ISD::FMIN:
	case ISD::FABS: // Begin 1 operand
	case ISD::FSQRT:
	case ISD::FRINT:
	case ISD::FCEIL:
	case ISD::FTRUNC:
	case ISD::FNEARBYINT:
	case ISD::FROUND:
	case ISD::FFLOOR:
	case X86ISD::FRCP:
	case X86ISD::FRSQRT: {
	// extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...
	SDLoc DL(ExtElt);
	SmallVector<SDValue, 4> ExtOps;
	for (SDValue Op : Vec->ops())
	ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index));
	return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps);
	}
	default:
	return SDValue();
	}
	llvm_unreachable("All opcodes should return within switch");
	}

	/// Try to convert a vector reduction sequence composed of binops and shuffles
	/// into horizontal ops.
	static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");

	// We need at least SSE2 to anything here.
	if (!Subtarget.hasSSE2())
	return SDValue();

	ISD::NodeType Opc;
	SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc,
	{ISD::ADD, ISD::MUL, ISD::FADD}, true);
	if (!Rdx)
	return SDValue();

	SDValue Index = ExtElt->getOperand(1);
	assert(isNullConstant(Index) &&
	"Reduction doesn't end in an extract from index 0");

	EVT VT = ExtElt->getValueType(0);
	EVT VecVT = Rdx.getValueType();
	if (VecVT.getScalarType() != VT)
	return SDValue();

	SDLoc DL(ExtElt);
	unsigned NumElts = VecVT.getVectorNumElements();
	unsigned EltSizeInBits = VecVT.getScalarSizeInBits();

	// Extend v4i8/v8i8 vector to v16i8, with undef upper 64-bits.
	auto WidenToV16I8 = [&](SDValue V, bool ZeroExtend) {
	if (V.getValueType() == MVT::v4i8) {
	if (ZeroExtend && Subtarget.hasSSE41()) {
	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
	DAG.getConstant(0, DL, MVT::v4i32),
	DAG.getBitcast(MVT::i32, V),
	DAG.getIntPtrConstant(0, DL));
	return DAG.getBitcast(MVT::v16i8, V);
	}
	V = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, V,
	ZeroExtend ? DAG.getConstant(0, DL, MVT::v4i8)
	: DAG.getUNDEF(MVT::v4i8));
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V,
	DAG.getUNDEF(MVT::v8i8));
	};

	// vXi8 mul reduction - promote to vXi16 mul reduction.
	if (Opc == ISD::MUL) {
	if (VT != MVT::i8 \|\| NumElts < 4 \|\| !isPowerOf2_32(NumElts))
	return SDValue();
	if (VecVT.getSizeInBits() >= 128) {
	EVT WideVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts / 2);
	SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
	SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
	Lo = DAG.getBitcast(WideVT, Lo);
	Hi = DAG.getBitcast(WideVT, Hi);
	Rdx = DAG.getNode(Opc, DL, WideVT, Lo, Hi);
	while (Rdx.getValueSizeInBits() > 128) {
	std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
	Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi);
	}
	} else {
	Rdx = WidenToV16I8(Rdx, false);
	Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8));
	Rdx = DAG.getBitcast(MVT::v8i16, Rdx);
	}
	if (NumElts >= 8)
	Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
	DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
	{4, 5, 6, 7, -1, -1, -1, -1}));
	Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
	DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
	{2, 3, -1, -1, -1, -1, -1, -1}));
	Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
	DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
	{1, -1, -1, -1, -1, -1, -1, -1}));
	Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
	}

	// vXi8 add reduction - sub 128-bit vector.
	if (VecVT == MVT::v4i8 \|\| VecVT == MVT::v8i8) {
	Rdx = WidenToV16I8(Rdx, true);
	Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
	DAG.getConstant(0, DL, MVT::v16i8));
	Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
	}

	// Must be a >=128-bit vector with pow2 elements.
	if ((VecVT.getSizeInBits() % 128) != 0 \|\| !isPowerOf2_32(NumElts))
	return SDValue();

	// vXi8 add reduction - sum lo/hi halves then use PSADBW.
	if (VT == MVT::i8) {
	while (Rdx.getValueSizeInBits() > 128) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
	VecVT = Lo.getValueType();
	Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi);
	}
	assert(VecVT == MVT::v16i8 && "v16i8 reduction expected");

	SDValue Hi = DAG.getVectorShuffle(
	MVT::v16i8, DL, Rdx, Rdx,
	{8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
	Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi);
	Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
	getZeroVector(MVT::v16i8, Subtarget, DAG, DL));
	Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
	}

	// See if we can use vXi8 PSADBW add reduction for larger zext types.
	// If the source vector values are 0-255, then we can use PSADBW to
	// sum+zext v8i8 subvectors to vXi64, then perform the reduction.
	// TODO: See if its worth avoiding vXi16/i32 truncations?
	if (Opc == ISD::ADD && NumElts >= 4 && EltSizeInBits >= 16 &&
	DAG.computeKnownBits(Rdx).getMaxValue().ule(255) &&
	(EltSizeInBits == 16 \|\| Rdx.getOpcode() == ISD::ZERO_EXTEND \|\|
	Subtarget.hasAVX512())) {
	EVT ByteVT = VecVT.changeVectorElementType(MVT::i8);
	Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx);
	if (ByteVT.getSizeInBits() < 128)
	Rdx = WidenToV16I8(Rdx, true);

	// Build the PSADBW, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
	auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64);
	SDValue Zero = DAG.getConstant(0, DL, Ops[0].getValueType());
	return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops[0], Zero);
	};
	MVT SadVT = MVT::getVectorVT(MVT::i64, Rdx.getValueSizeInBits() / 64);
	Rdx = SplitOpsAndApply(DAG, Subtarget, DL, SadVT, {Rdx}, PSADBWBuilder);

	// TODO: We could truncate to vXi16/vXi32 before performing the reduction.
	while (Rdx.getValueSizeInBits() > 128) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
	VecVT = Lo.getValueType();
	Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi);
	}
	assert(Rdx.getValueType() == MVT::v2i64 && "v2i64 reduction expected");

	if (NumElts > 8) {
	SDValue RdxHi = DAG.getVectorShuffle(MVT::v2i64, DL, Rdx, Rdx, {1, -1});
	Rdx = DAG.getNode(ISD::ADD, DL, MVT::v2i64, Rdx, RdxHi);
	}

	VecVT = MVT::getVectorVT(VT.getSimpleVT(), 128 / VT.getSizeInBits());
	Rdx = DAG.getBitcast(VecVT, Rdx);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
	}

	// Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize.
	if (!shouldUseHorizontalOp(true, DAG, Subtarget))
	return SDValue();

	unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;

	// 256-bit horizontal instructions operate on 128-bit chunks rather than
	// across the whole vector, so we need an extract + hop preliminary stage.
	// This is the only step where the operands of the hop are not the same value.
	// TODO: We could extend this to handle 512-bit or even longer vectors.
	if (((VecVT == MVT::v16i16 \|\| VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) \|\|
	((VecVT == MVT::v8f32 \|\| VecVT == MVT::v4f64) && Subtarget.hasSSE3())) {
	unsigned NumElts = VecVT.getVectorNumElements();
	SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL);
	SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL);
	Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo);
	VecVT = Rdx.getValueType();
	}
	if (!((VecVT == MVT::v8i16 \|\| VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) &&
	!((VecVT == MVT::v4f32 \|\| VecVT == MVT::v2f64) && Subtarget.hasSSE3()))
	return SDValue();

	// extract (add (shuf X), X), 0 --> extract (hadd X, X), 0
	unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements());
	for (unsigned i = 0; i != ReductionSteps; ++i)
	Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
	}

	/// Detect vector gather/scatter index generation and convert it from being a
	/// bunch of shuffles and extracts into a somewhat faster sequence.
	/// For i686, the best sequence is apparently storing the value and loading
	/// scalars back, while for x64 we should use 64-bit extracts and shifts.
	static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
	return NewOp;

	SDValue InputVector = N->getOperand(0);
	SDValue EltIdx = N->getOperand(1);
	auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx);

	EVT SrcVT = InputVector.getValueType();
	EVT VT = N->getValueType(0);
	SDLoc dl(InputVector);
	bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT;
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	unsigned NumEltBits = VT.getScalarSizeInBits();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts))
	return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);

	// Integer Constant Folding.
	if (CIdx && VT.isInteger()) {
	APInt UndefVecElts;
	SmallVector<APInt, 16> EltBits;
	unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits();
	if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts,
	EltBits, true, false)) {
	uint64_t Idx = CIdx->getZExtValue();
	if (UndefVecElts[Idx])
	return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
	return DAG.getConstant(EltBits[Idx].zext(NumEltBits), dl, VT);
	}

	// Convert extract_element(bitcast(<X x i1>) -> bitcast(extract_subvector()).
	// Improves lowering of bool masks on rust which splits them into byte array.
	if (InputVector.getOpcode() == ISD::BITCAST && (NumEltBits % 8) == 0) {
	SDValue Src = peekThroughBitcasts(InputVector);
	if (Src.getValueType().getScalarType() == MVT::i1 &&
	TLI.isTypeLegal(Src.getValueType())) {
	MVT SubVT = MVT::getVectorVT(MVT::i1, NumEltBits);
	SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Src,
	DAG.getIntPtrConstant(CIdx->getZExtValue() * NumEltBits, dl));
	return DAG.getBitcast(VT, Sub);
	}
	}
	}

	if (IsPextr) {
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
	DCI))
	return SDValue(N, 0);

	// PEXTR(PINSR(v, s, c), c) -> s (with implicit zext handling).
	if ((InputVector.getOpcode() == X86ISD::PINSRB \|\|
	InputVector.getOpcode() == X86ISD::PINSRW) &&
	InputVector.getOperand(2) == EltIdx) {
	assert(SrcVT == InputVector.getOperand(0).getValueType() &&
	"Vector type mismatch");
	SDValue Scl = InputVector.getOperand(1);
	Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl);
	return DAG.getZExtOrTrunc(Scl, dl, VT);
	}

	// TODO - Remove this once we can handle the implicit zero-extension of
	// X86ISD::PEXTRW/X86ISD::PEXTRB in combinePredicateReduction and
	// combineBasicSADPattern.
	return SDValue();
	}

	// Detect mmx extraction of all bits as a i64. It works better as a bitcast.
	if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
	VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
	SDValue MMXSrc = InputVector.getOperand(0);

	// The bitcast source is a direct mmx result.
	if (MMXSrc.getValueType() == MVT::x86mmx)
	return DAG.getBitcast(VT, InputVector);
	}

	// Detect mmx to i32 conversion through a v2i32 elt extract.
	if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
	VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) {
	SDValue MMXSrc = InputVector.getOperand(0);

	// The bitcast source is a direct mmx result.
	if (MMXSrc.getValueType() == MVT::x86mmx)
	return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
	}

	// Check whether this extract is the root of a sum of absolute differences
	// pattern. This has to be done here because we really want it to happen
	// pre-legalization,
	if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
	return SAD;

	if (SDValue VPDPBUSD = combineVPDPBUSDPattern(N, DAG, Subtarget))
	return VPDPBUSD;

	// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
	if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget))
	return Cmp;

	// Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW.
	if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget))
	return MinMax;

	// Attempt to optimize ADD/FADD/MUL reductions with HADD, promotion etc..
	if (SDValue V = combineArithReduction(N, DAG, Subtarget))
	return V;

	if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
	return V;

	// Attempt to extract a i1 element by using MOVMSK to extract the signbits
	// and then testing the relevant element.
	//
	// Note that we only combine extracts on the same result number, i.e.
	// t0 = merge_values a0, a1, a2, a3
	// i1 = extract_vector_elt t0, Constant:i64<2>
	// i1 = extract_vector_elt t0, Constant:i64<3>
	// but not
	// i1 = extract_vector_elt t0:1, Constant:i64<2>
	// since the latter would need its own MOVMSK.
	if (SrcVT.getScalarType() == MVT::i1) {
	bool IsVar = !CIdx;
	SmallVector<SDNode *, 16> BoolExtracts;
	unsigned ResNo = InputVector.getResNo();
	auto IsBoolExtract = [&BoolExtracts, &ResNo, &IsVar](SDNode *Use) {
	if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Use->getOperand(0).getResNo() == ResNo &&
	Use->getValueType(0) == MVT::i1) {
	BoolExtracts.push_back(Use);
	IsVar \|= !isa<ConstantSDNode>(Use->getOperand(1));
	return true;
	}
	return false;
	};
	// TODO: Can we drop the oneuse check for constant extracts?
	if (all_of(InputVector->uses(), IsBoolExtract) &&
	(IsVar \|\| BoolExtracts.size() > 1)) {
	EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
	if (SDValue BC =
	combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
	for (SDNode *Use : BoolExtracts) {
	// extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask
	// Mask = 1 << MaskIdx
	SDValue MaskIdx = DAG.getZExtOrTrunc(Use->getOperand(1), dl, MVT::i8);
	SDValue MaskBit = DAG.getConstant(1, dl, BCVT);
	SDValue Mask = DAG.getNode(ISD::SHL, dl, BCVT, MaskBit, MaskIdx);
	SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
	Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ);
	DCI.CombineTo(Use, Res);
	}
	return SDValue(N, 0);
	}
	}
	}

	// If this extract is from a loaded vector value and will be used as an
	// integer, that requires a potentially expensive XMM -> GPR transfer.
	// Additionally, if we can convert to a scalar integer load, that will likely
	// be folded into a subsequent integer op.
	// Note: Unlike the related fold for this in DAGCombiner, this is not limited
	// to a single-use of the loaded vector. For the reasons above, we
	// expect this to be profitable even if it creates an extra load.
	bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
	return Use->getOpcode() == ISD::STORE \|\|
	Use->getOpcode() == ISD::INSERT_VECTOR_ELT \|\|
	Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
	});
	auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
	if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
	SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
	!LikelyUsedAsVector && LoadVec->isSimple()) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue NewPtr =
	TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
	unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
	MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
	Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
	SDValue Load =
	DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
	LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
	DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
	return Load;
	}

	return SDValue();
	}

	// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)).
	// This is more or less the reverse of combineBitcastvxi1.
	static SDValue combineToExtendBoolVectorInReg(
	unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) {
	if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND &&
	Opcode != ISD::ANY_EXTEND)
	return SDValue();
	if (!DCI.isBeforeLegalizeOps())
	return SDValue();
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512())
	return SDValue();

	EVT SVT = VT.getScalarType();
	EVT InSVT = N0.getValueType().getScalarType();
	unsigned EltSizeInBits = SVT.getSizeInBits();

	// Input type must be extending a bool vector (bit-casted from a scalar
	// integer) to legal integer types.
	if (!VT.isVector())
	return SDValue();
	if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
	return SDValue();
	if (InSVT != MVT::i1 \|\| N0.getOpcode() != ISD::BITCAST)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	EVT SclVT = N00.getValueType();
	if (!SclVT.isScalarInteger())
	return SDValue();

	SDValue Vec;
	SmallVector<int> ShuffleMask;
	unsigned NumElts = VT.getVectorNumElements();
	assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");

	// Broadcast the scalar integer to the vector elements.
	if (NumElts > EltSizeInBits) {
	// If the scalar integer is greater than the vector element size, then we
	// must split it down into sub-sections for broadcasting. For example:
	// i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections.
	// i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.
	assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
	unsigned Scale = NumElts / EltSizeInBits;
	EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
	Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
	Vec = DAG.getBitcast(VT, Vec);

	for (unsigned i = 0; i != Scale; ++i)
	ShuffleMask.append(EltSizeInBits, i);
	Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
	} else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
	(SclVT == MVT::i8 \|\| SclVT == MVT::i16 \|\| SclVT == MVT::i32)) {
	// If we have register broadcast instructions, use the scalar size as the
	// element type for the shuffle. Then cast to the wider element type. The
	// widened bits won't be used, and this might allow the use of a broadcast
	// load.
	assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale");
	unsigned Scale = EltSizeInBits / NumElts;
	EVT BroadcastVT =
	EVT::getVectorVT(DAG.getContext(), SclVT, NumElts Scale);
	Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
	ShuffleMask.append(NumElts * Scale, 0);
	Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask);
	Vec = DAG.getBitcast(VT, Vec);
	} else {
	// For smaller scalar integers, we can simply any-extend it to the vector
	// element size (we don't care about the upper bits) and broadcast it to all
	// elements.
	SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
	Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
	ShuffleMask.append(NumElts, 0);
	Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
	}

	// Now, mask the relevant bit in each element.
	SmallVector<SDValue, 32> Bits;
	for (unsigned i = 0; i != NumElts; ++i) {
	int BitIdx = (i % EltSizeInBits);
	APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1);
	Bits.push_back(DAG.getConstant(Bit, DL, SVT));
	}
	SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);
	Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);

	// Compare against the bitmask and extend the result.
	EVT CCVT = VT.changeVectorElementType(MVT::i1);
	Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ);
	Vec = DAG.getSExtOrTrunc(Vec, DL, VT);

	// For SEXT, this is now done, otherwise shift the result down for
	// zero-extension.
	if (Opcode == ISD::SIGN_EXTEND)
	return Vec;
	return DAG.getNode(ISD::SRL, DL, VT, Vec,
	DAG.getConstant(EltSizeInBits - 1, DL, VT));
	}

	/// If a vector select has an operand that is -1 or 0, try to simplify the
	/// select to a bitwise logic operation.
	/// TODO: Move to DAGCombiner, possibly using TargetLowering::hasAndNot()?
	static SDValue
	combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue Cond = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	EVT VT = LHS.getValueType();
	EVT CondVT = Cond.getValueType();
	SDLoc DL(N);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (N->getOpcode() != ISD::VSELECT)
	return SDValue();

	assert(CondVT.isVector() && "Vector select expects a vector selector!");

	// TODO: Use isNullOrNullSplat() to distinguish constants with undefs?
	// TODO: Can we assert that both operands are not zeros (because that should
	// get simplified at node creation time)?
	bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
	bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());

	// If both inputs are 0/undef, create a complete zero vector.
	// FIXME: As noted above this should be handled by DAGCombiner/getNode.
	if (TValIsAllZeros && FValIsAllZeros) {
	if (VT.isFloatingPoint())
	return DAG.getConstantFP(0.0, DL, VT);
	return DAG.getConstant(0, DL, VT);
	}

	// To use the condition operand as a bitwise mask, it must have elements that
	// are the same size as the select elements. Ie, the condition operand must
	// have already been promoted from the IR select condition type <N x i1>.
	// Don't check if the types themselves are equal because that excludes
	// vector floating-point selects.
	if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
	return SDValue();

	// Try to invert the condition if true value is not all 1s and false value is
	// not all 0s. Only do this if the condition has one use.
	bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
	if (!TValIsAllOnes && !FValIsAllZeros && Cond.hasOneUse() &&
	// Check if the selector will be produced by CMPP/PCMP.
	Cond.getOpcode() == ISD::SETCC &&
	// Check if SETCC has already been promoted.
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
	CondVT) {
	bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());

	if (TValIsAllZeros \|\| FValIsAllOnes) {
	SDValue CC = Cond.getOperand(2);
	ISD::CondCode NewCC = ISD::getSetCCInverse(
	cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
	Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
	NewCC);
	std::swap(LHS, RHS);
	TValIsAllOnes = FValIsAllOnes;
	FValIsAllZeros = TValIsAllZeros;
	}
	}

	// Cond value must be 'sign splat' to be converted to a logical op.
	if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
	return SDValue();

	// vselect Cond, 111..., 000... -> Cond
	if (TValIsAllOnes && FValIsAllZeros)
	return DAG.getBitcast(VT, Cond);

	if (!TLI.isTypeLegal(CondVT))
	return SDValue();

	// vselect Cond, 111..., X -> or Cond, X
	if (TValIsAllOnes) {
	SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
	SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
	return DAG.getBitcast(VT, Or);
	}

	// vselect Cond, X, 000... -> and Cond, X
	if (FValIsAllZeros) {
	SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
	SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
	return DAG.getBitcast(VT, And);
	}

	// vselect Cond, 000..., X -> andn Cond, X
	if (TValIsAllZeros) {
	SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
	SDValue AndN;
	// The canonical form differs for i1 vectors - x86andnp is not used
	if (CondVT.getScalarType() == MVT::i1)
	AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
	CastRHS);
	else
	AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
	return DAG.getBitcast(VT, AndN);
	}

	return SDValue();
	}

	/// If both arms of a vector select are concatenated vectors, split the select,
	/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
	/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
	/// concat (vselect (split Cond), T0, F0), (vselect (split Cond), T1, F1)
	static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned Opcode = N->getOpcode();
	if (Opcode != X86ISD::BLENDV && Opcode != ISD::VSELECT)
	return SDValue();

	// TODO: Split 512-bit vectors too?
	EVT VT = N->getValueType(0);
	if (!VT.is256BitVector())
	return SDValue();

	// TODO: Split as long as any 2 of the 3 operands are concatenated?
	SDValue Cond = N->getOperand(0);
	SDValue TVal = N->getOperand(1);
	SDValue FVal = N->getOperand(2);
	SmallVector<SDValue, 4> CatOpsT, CatOpsF;
	if (!TVal.hasOneUse() \|\| !FVal.hasOneUse() \|\|
	!collectConcatOps(TVal.getNode(), CatOpsT, DAG) \|\|
	!collectConcatOps(FVal.getNode(), CatOpsF, DAG))
	return SDValue();

	auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	return DAG.getNode(Opcode, DL, Ops[1].getValueType(), Ops);
	};
	return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { Cond, TVal, FVal },
	makeBlend, /CheckBWI/ false);
	}

	static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
	SDValue Cond = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	SDLoc DL(N);

	auto *TrueC = dyn_cast<ConstantSDNode>(LHS);
	auto *FalseC = dyn_cast<ConstantSDNode>(RHS);
	if (!TrueC \|\| !FalseC)
	return SDValue();

	// Don't do this for crazy integer types.
	EVT VT = N->getValueType(0);
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	// We're going to use the condition bit in math or logic ops. We could allow
	// this with a wider condition value (post-legalization it becomes an i8),
	// but if nothing is creating selects that late, it doesn't matter.
	if (Cond.getValueType() != MVT::i1)
	return SDValue();

	// A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by
	// 3, 5, or 9 with i32/i64, so those get transformed too.
	// TODO: For constants that overflow or do not differ by power-of-2 or small
	// multiplier, convert to 'and' + 'add'.
	const APInt &TrueVal = TrueC->getAPIntValue();
	const APInt &FalseVal = FalseC->getAPIntValue();

	// We have a more efficient lowering for "(X == 0) ? Y : -1" using SBB.
	if ((TrueVal.isAllOnes() \|\| FalseVal.isAllOnes()) &&
	Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
	if (CC == ISD::SETEQ \|\| CC == ISD::SETNE)
	return SDValue();
	}

	bool OV;
	APInt Diff = TrueVal.ssub_ov(FalseVal, OV);
	if (OV)
	return SDValue();

	APInt AbsDiff = Diff.abs();
	if (AbsDiff.isPowerOf2() \|\|
	((VT == MVT::i32 \|\| VT == MVT::i64) &&
	(AbsDiff == 3 \|\| AbsDiff == 5 \|\| AbsDiff == 9))) {

	// We need a positive multiplier constant for shift/LEA codegen. The 'not'
	// of the condition can usually be folded into a compare predicate, but even
	// without that, the sequence should be cheaper than a CMOV alternative.
	if (TrueVal.slt(FalseVal)) {
	Cond = DAG.getNOT(DL, Cond, MVT::i1);
	std::swap(TrueC, FalseC);
	}

	// select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC
	SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);

	// Multiply condition by the difference if non-one.
	if (!AbsDiff.isOne())
	R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT));

	// Add the base if non-zero.
	if (!FalseC->isZero())
	R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0));

	return R;
	}

	return SDValue();
	}

	/// If this is a dynamic select (non-constant condition) and we can match
	/// this node with one of the variable blend instructions, restructure the
	/// condition so that blends can use the high (sign) bit of each element.
	/// This function will also call SimplifyDemandedBits on already created
	/// BLENDV to perform additional simplifications.
	static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue Cond = N->getOperand(0);
	if ((N->getOpcode() != ISD::VSELECT &&
	N->getOpcode() != X86ISD::BLENDV) \|\|
	ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned BitWidth = Cond.getScalarValueSizeInBits();
	EVT VT = N->getValueType(0);

	// We can only handle the cases where VSELECT is directly legal on the
	// subtarget. We custom lower VSELECT nodes with constant conditions and
	// this makes it hard to see whether a dynamic VSELECT will correctly
	// lower, so we both check the operation's status and explicitly handle the
	// cases where a dynamic blend will fail even though a constant-condition
	// blend could be custom lowered.
	// FIXME: We should find a better way to handle this class of problems.
	// Potentially, we should combine constant-condition vselect nodes
	// pre-legalization into shuffles and not mark as many types as custom
	// lowered.
	if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
	return SDValue();
	// FIXME: We don't support i16-element blends currently. We could and
	// should support them by making all the bits in the condition be set
	// rather than just the high bit and using an i8-element blend.
	if (VT.getVectorElementType() == MVT::i16)
	return SDValue();
	// Dynamic blending was only available from SSE4.1 onward.
	if (VT.is128BitVector() && !Subtarget.hasSSE41())
	return SDValue();
	// Byte blends are only available in AVX2
	if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
	return SDValue();
	// There are no 512-bit blend instructions that use sign bits.
	if (VT.is512BitVector())
	return SDValue();

	// Don't optimize before the condition has been transformed to a legal type
	// and don't ever optimize vector selects that map to AVX512 mask-registers.
	if (BitWidth < 8 \|\| BitWidth > 64)
	return SDValue();

	auto OnlyUsedAsSelectCond = [](SDValue Cond) {
	for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
	UI != UE; ++UI)
	if ((UI->getOpcode() != ISD::VSELECT &&
	UI->getOpcode() != X86ISD::BLENDV) \|\|
	UI.getOperandNo() != 0)
	return false;

	return true;
	};

	APInt DemandedBits(APInt::getSignMask(BitWidth));

	if (OnlyUsedAsSelectCond(Cond)) {
	KnownBits Known;
	TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	if (!TLI.SimplifyDemandedBits(Cond, DemandedBits, Known, TLO, 0, true))
	return SDValue();

	// If we changed the computation somewhere in the DAG, this change will
	// affect all users of Cond. Update all the nodes so that we do not use
	// the generic VSELECT anymore. Otherwise, we may perform wrong
	// optimizations as we messed with the actual expectation for the vector
	// boolean values.
	for (SDNode *U : Cond->uses()) {
	if (U->getOpcode() == X86ISD::BLENDV)
	continue;

	SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
	Cond, U->getOperand(1), U->getOperand(2));
	DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
	DCI.AddToWorklist(U);
	}
	DCI.CommitTargetLoweringOpt(TLO);
	return SDValue(N, 0);
	}

	// Otherwise we can still at least try to simplify multiple use bits.
	if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedBits, DAG))
	return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), V,
	N->getOperand(1), N->getOperand(2));

	return SDValue();
	}

	// Try to match:
	// (or (and (M, (sub 0, X)), (pandn M, X)))
	// which is a special case of:
	// (select M, (sub 0, X), X)
	// Per:
	// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
	// We know that, if fNegate is 0 or 1:
	// (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
	//
	// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
	// ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
	// ( M ? -X : X) == ((X ^ M ) + (M & 1))
	// This lets us transform our vselect to:
	// (add (xor X, M), (and M, 1))
	// And further to:
	// (sub (xor X, M), M)
	static SDValue combineLogicBlendIntoConditionalNegate(
	EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
	SelectionDAG &DAG, const X86Subtarget &Subtarget) {
	EVT MaskVT = Mask.getValueType();
	assert(MaskVT.isInteger() &&
	DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() &&
	"Mask must be zero/all-bits");

	if (X.getValueType() != MaskVT \|\| Y.getValueType() != MaskVT)
	return SDValue();
	if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT))
	return SDValue();

	auto IsNegV = [](SDNode *N, SDValue V) {
	return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
	ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
	};

	SDValue V;
	if (IsNegV(Y.getNode(), X))
	V = X;
	else if (IsNegV(X.getNode(), Y))
	V = Y;
	else
	return SDValue();

	SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
	SDValue SubOp2 = Mask;

	// If the negate was on the false side of the select, then
	// the operands of the SUB need to be swapped. PR 27251.
	// This is because the pattern being matched above is
	// (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
	// but if the pattern matched was
	// (vselect M, X, (sub (0, X))), that is really negation of the pattern
	// above, -(vselect M, (sub 0, X), X), and therefore the replacement
	// pattern also needs to be a negation of the replacement pattern above.
	// And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
	// sub accomplishes the negation of the replacement pattern.
	if (V == Y)
	std::swap(SubOp1, SubOp2);

	SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
	return DAG.getBitcast(VT, Res);
	}

	/// Do target-specific dag combines on SELECT and VSELECT nodes.
	static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	SDValue Cond = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);

	// Try simplification again because we use this function to optimize
	// BLENDV nodes that are not handled by the generic combiner.
	if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS))
	return V;

	EVT VT = LHS.getValueType();
	EVT CondVT = Cond.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode());

	// Attempt to combine (select M, (sub 0, X), X) -> (sub (xor X, M), M).
	// Limit this to cases of non-constant masks that createShuffleMaskFromVSELECT
	// can't catch, plus vXi8 cases where we'd likely end up with BLENDV.
	if (CondVT.isVector() && CondVT.isInteger() &&
	CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() &&
	(!CondConstantVector \|\| CondVT.getScalarType() == MVT::i8) &&
	DAG.ComputeNumSignBits(Cond) == CondVT.getScalarSizeInBits())
	if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS,
	DL, DAG, Subtarget))
	return V;

	// Convert vselects with constant condition into shuffles.
	if (CondConstantVector && DCI.isBeforeLegalizeOps() &&
	(N->getOpcode() == ISD::VSELECT \|\| N->getOpcode() == X86ISD::BLENDV)) {
	SmallVector<int, 64> Mask;
	if (createShuffleMaskFromVSELECT(Mask, Cond,
	N->getOpcode() == X86ISD::BLENDV))
	return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
	}

	// fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y))
	// by forcing the unselected elements to zero.
	// TODO: Can we handle more shuffles with this?
	if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() &&
	LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB &&
	LHS.hasOneUse() && RHS.hasOneUse()) {
	MVT SimpleVT = VT.getSimpleVT();
	SmallVector<SDValue, 1> LHSOps, RHSOps;
	SmallVector<int, 64> LHSMask, RHSMask, CondMask;
	if (createShuffleMaskFromVSELECT(CondMask, Cond) &&
	getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask) &&
	getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) {
	int NumElts = VT.getVectorNumElements();
	for (int i = 0; i != NumElts; ++i) {
	// getConstVector sets negative shuffle mask values as undef, so ensure
	// we hardcode SM_SentinelZero values to zero (0x80).
	if (CondMask[i] < NumElts) {
	LHSMask[i] = isUndefOrZero(LHSMask[i]) ? 0x80 : LHSMask[i];
	RHSMask[i] = 0x80;
	} else {
	LHSMask[i] = 0x80;
	RHSMask[i] = isUndefOrZero(RHSMask[i]) ? 0x80 : RHSMask[i];
	}
	}
	LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),
	getConstVector(LHSMask, SimpleVT, DAG, DL, true));
	RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0),
	getConstVector(RHSMask, SimpleVT, DAG, DL, true));
	return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
	}
	}

	// If we have SSE[12] support, try to form min/max nodes. SSE min/max
	// instructions match the semantics of the common C idiom x<y?x:y but not
	// x<=y?x:y, because of how they handle negative zero (which can be
	// ignored in unsafe-math mode).
	// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
	if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
	VT != MVT::f80 && VT != MVT::f128 && !isSoftFP16(VT, Subtarget) &&
	(TLI.isTypeLegal(VT) \|\| VT == MVT::v2f32) &&
	(Subtarget.hasSSE2() \|\|
	(Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

	unsigned Opcode = 0;
	// Check for x CC y ? x : y.
	if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
	DAG.isEqualTo(RHS, Cond.getOperand(1))) {
	switch (CC) {
	default: break;
	case ISD::SETULT:
	// Converting this to a min would handle NaNs incorrectly, and swapping
	// the operands would cause it to handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS)) {
	if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
	!(DAG.isKnownNeverZeroFloat(LHS) \|\|
	DAG.isKnownNeverZeroFloat(RHS)))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETOLE:
	// Converting this to a min would handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
	!DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS))
	break;
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETULE:
	// Converting this to a min would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	[[fallthrough]];
	case ISD::SETOLT:
	case ISD::SETLT:
	case ISD::SETLE:
	Opcode = X86ISD::FMIN;
	break;

	case ISD::SETOGE:
	// Converting this to a max would handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
	!DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS))
	break;
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETUGT:
	// Converting this to a max would handle NaNs incorrectly, and swapping
	// the operands would cause it to handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS)) {
	if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
	!(DAG.isKnownNeverZeroFloat(LHS) \|\|
	DAG.isKnownNeverZeroFloat(RHS)))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETUGE:
	// Converting this to a max would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	[[fallthrough]];
	case ISD::SETOGT:
	case ISD::SETGT:
	case ISD::SETGE:
	Opcode = X86ISD::FMAX;
	break;
	}
	// Check for x CC y ? y : x -- a min/max with reversed arms.
	} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
	DAG.isEqualTo(RHS, Cond.getOperand(0))) {
	switch (CC) {
	default: break;
	case ISD::SETOGE:
	// Converting this to a min would handle comparisons between positive
	// and negative zero incorrectly, and swapping the operands would
	// cause it to handle NaNs incorrectly.
	if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
	!(DAG.isKnownNeverZeroFloat(LHS) \|\|
	DAG.isKnownNeverZeroFloat(RHS))) {
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETUGT:
	// Converting this to a min would handle NaNs incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETUGE:
	// Converting this to a min would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	[[fallthrough]];
	case ISD::SETOGT:
	case ISD::SETGT:
	case ISD::SETGE:
	Opcode = X86ISD::FMIN;
	break;

	case ISD::SETULT:
	// Converting this to a max would handle NaNs incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETOLE:
	// Converting this to a max would handle comparisons between positive
	// and negative zero incorrectly, and swapping the operands would
	// cause it to handle NaNs incorrectly.
	if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
	!DAG.isKnownNeverZeroFloat(LHS) &&
	!DAG.isKnownNeverZeroFloat(RHS)) {
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETULE:
	// Converting this to a max would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	[[fallthrough]];
	case ISD::SETOLT:
	case ISD::SETLT:
	case ISD::SETLE:
	Opcode = X86ISD::FMAX;
	break;
	}
	}

	if (Opcode)
	return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
	}

	// Some mask scalar intrinsics rely on checking if only one bit is set
	// and implement it in C code like this:
	// A[0] = (U & 1) ? A[0] : W[0];
	// This creates some redundant instructions that break pattern matching.
	// fold (select (setcc (and (X, 1), 0, seteq), Y, Z)) -> select(and(X, 1),Z,Y)
	if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT &&
	Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 \|\| VT == MVT::f64)) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
	SDValue AndNode = Cond.getOperand(0);
	if (AndNode.getOpcode() == ISD::AND && CC == ISD::SETEQ &&
	isNullConstant(Cond.getOperand(1)) &&
	isOneConstant(AndNode.getOperand(1))) {
	// LHS and RHS swapped due to
	// setcc outputting 1 when AND resulted in 0 and vice versa.
	AndNode = DAG.getZExtOrTrunc(AndNode, DL, MVT::i8);
	return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS);
	}
	}

	// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
	// lowering on KNL. In this case we convert it to
	// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
	// The same situation all vectors of i8 and i16 without BWI.
	// Make sure we extend these even before type legalization gets a chance to
	// split wide vectors.
	// Since SKX these selects have a proper lowering.
	if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
	CondVT.getVectorElementType() == MVT::i1 &&
	(VT.getVectorElementType() == MVT::i8 \|\|
	VT.getVectorElementType() == MVT::i16)) {
	Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
	return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
	}

	// AVX512 - Extend select with zero to merge with target shuffle.
	// select(mask, extract_subvector(shuffle(x)), zero) -->
	// extract_subvector(select(insert_subvector(mask), shuffle(x), zero))
	// TODO - support non target shuffles as well.
	if (Subtarget.hasAVX512() && CondVT.isVector() &&
	CondVT.getVectorElementType() == MVT::i1) {
	auto SelectableOp = [&TLI](SDValue Op) {
	return Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	isTargetShuffle(Op.getOperand(0).getOpcode()) &&
	isNullConstant(Op.getOperand(1)) &&
	TLI.isTypeLegal(Op.getOperand(0).getValueType()) &&
	Op.hasOneUse() && Op.getOperand(0).hasOneUse();
	};

	bool SelectableLHS = SelectableOp(LHS);
	bool SelectableRHS = SelectableOp(RHS);
	bool ZeroLHS = ISD::isBuildVectorAllZeros(LHS.getNode());
	bool ZeroRHS = ISD::isBuildVectorAllZeros(RHS.getNode());

	if ((SelectableLHS && ZeroRHS) \|\| (SelectableRHS && ZeroLHS)) {
	EVT SrcVT = SelectableLHS ? LHS.getOperand(0).getValueType()
	: RHS.getOperand(0).getValueType();
	EVT SrcCondVT = SrcVT.changeVectorElementType(MVT::i1);
	LHS = insertSubVector(DAG.getUNDEF(SrcVT), LHS, 0, DAG, DL,
	VT.getSizeInBits());
	RHS = insertSubVector(DAG.getUNDEF(SrcVT), RHS, 0, DAG, DL,
	VT.getSizeInBits());
	Cond = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcCondVT,
	DAG.getUNDEF(SrcCondVT), Cond,
	DAG.getIntPtrConstant(0, DL));
	SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS);
	return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
	}
	}

	if (SDValue V = combineSelectOfTwoConstants(N, DAG))
	return V;

	if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
	Cond.hasOneUse()) {
	EVT CondVT = Cond.getValueType();
	SDValue Cond0 = Cond.getOperand(0);
	SDValue Cond1 = Cond.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

	// Canonicalize min/max:
	// (x > 0) ? x : 0 -> (x >= 0) ? x : 0
	// (x < -1) ? x : -1 -> (x <= -1) ? x : -1
	// This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
	// the need for an extra compare against zero. e.g.
	// (a - b) > 0 : (a - b) ? 0 -> (a - b) >= 0 : (a - b) ? 0
	// subl %esi, %edi
	// testl %edi, %edi
	// movl $0, %eax
	// cmovgl %edi, %eax
	// =>
	// xorl %eax, %eax
	// subl %esi, $edi
	// cmovsl %eax, %edi
	//
	// We can also canonicalize
	// (x s> 1) ? x : 1 -> (x s>= 1) ? x : 1 -> (x s> 0) ? x : 1
	// (x u> 1) ? x : 1 -> (x u>= 1) ? x : 1 -> (x != 0) ? x : 1
	// This allows the use of a test instruction for the compare.
	if (LHS == Cond0 && RHS == Cond1) {
	if ((CC == ISD::SETGT && (isNullConstant(RHS) \|\| isOneConstant(RHS))) \|\|
	(CC == ISD::SETLT && isAllOnesConstant(RHS))) {
	ISD::CondCode NewCC = CC == ISD::SETGT ? ISD::SETGE : ISD::SETLE;
	Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC);
	return DAG.getSelect(DL, VT, Cond, LHS, RHS);
	}
	if (CC == ISD::SETUGT && isOneConstant(RHS)) {
	ISD::CondCode NewCC = ISD::SETUGE;
	Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC);
	return DAG.getSelect(DL, VT, Cond, LHS, RHS);
	}
	}

	// Similar to DAGCombine's select(or(CC0,CC1),X,Y) fold but for legal types.
	// fold eq + gt/lt nested selects into ge/le selects
	// select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y)
	// --> (select (cmpuge Cond0, Cond1), LHS, Y)
	// select (cmpslt Cond0, Cond1), LHS, (select (cmpeq Cond0, Cond1), LHS, Y)
	// --> (select (cmpsle Cond0, Cond1), LHS, Y)
	// .. etc ..
	if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS &&
	RHS.getOperand(0).getOpcode() == ISD::SETCC) {
	SDValue InnerSetCC = RHS.getOperand(0);
	ISD::CondCode InnerCC =
	cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get();
	if ((CC == ISD::SETEQ \|\| InnerCC == ISD::SETEQ) &&
	Cond0 == InnerSetCC.getOperand(0) &&
	Cond1 == InnerSetCC.getOperand(1)) {
	ISD::CondCode NewCC;
	switch (CC == ISD::SETEQ ? InnerCC : CC) {
	case ISD::SETGT: NewCC = ISD::SETGE; break;
	case ISD::SETLT: NewCC = ISD::SETLE; break;
	case ISD::SETUGT: NewCC = ISD::SETUGE; break;
	case ISD::SETULT: NewCC = ISD::SETULE; break;
	default: NewCC = ISD::SETCC_INVALID; break;
	}
	if (NewCC != ISD::SETCC_INVALID) {
	Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC);
	return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2));
	}
	}
	}
	}

	// Check if the first operand is all zeros and Cond type is vXi1.
	// If this an avx512 target we can improve the use of zero masking by
	// swapping the operands and inverting the condition.
	if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
	Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
	ISD::isBuildVectorAllZeros(LHS.getNode()) &&
	!ISD::isBuildVectorAllZeros(RHS.getNode())) {
	// Invert the cond to not(cond) : xor(op,allones)=not(op)
	SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
	// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
	return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
	}

	// Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might
	// get split by legalization.
	if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST &&
	CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() &&
	TLI.isTypeLegal(VT.getScalarType())) {
	EVT ExtCondVT = VT.changeVectorElementTypeToInteger();
	if (SDValue ExtCond = combineToExtendBoolVectorInReg(
	ISD::SIGN_EXTEND, DL, ExtCondVT, Cond, DAG, DCI, Subtarget)) {
	ExtCond = DAG.getNode(ISD::TRUNCATE, DL, CondVT, ExtCond);
	return DAG.getSelect(DL, VT, ExtCond, LHS, RHS);
	}
	}

	// Early exit check
	if (!TLI.isTypeLegal(VT) \|\| isSoftFP16(VT, Subtarget))
	return SDValue();

	if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
	return V;

	if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget))
	return V;

	if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
	return V;

	// select(~Cond, X, Y) -> select(Cond, Y, X)
	if (CondVT.getScalarType() != MVT::i1) {
	if (SDValue CondNot = IsNOT(Cond, DAG))
	return DAG.getNode(N->getOpcode(), DL, VT,
	DAG.getBitcast(CondVT, CondNot), RHS, LHS);

	if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse()) {
	// pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the
	// signbit.
	if (ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
	Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
	DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
	return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
	}

	// smin(LHS, RHS) : select(pcmpgt(RHS, LHS), LHS, RHS)
	// -> select(pcmpgt(LHS, RHS), RHS, LHS)
	// iff the commuted pcmpgt() already exists.
	// TODO: Could DAGCombiner::combine cse search for SETCC nodes, like it
	// does for commutative binops?
	if (Cond.getOperand(0) == RHS && Cond.getOperand(1) == LHS) {
	if (SDNode *FlipCond =
	DAG.getNodeIfExists(X86ISD::PCMPGT, DAG.getVTList(CondVT),
	{Cond.getOperand(1), Cond.getOperand(0)})) {
	return DAG.getNode(N->getOpcode(), DL, VT, SDValue(FlipCond, 0), RHS,
	LHS);
	}
	}
	}
	}

	// Try to optimize vXi1 selects if both operands are either all constants or
	// bitcasts from scalar integer type. In that case we can convert the operands
	// to integer and use an integer select which will be converted to a CMOV.
	// We need to take a little bit of care to avoid creating an i64 type after
	// type legalization.
	if (N->getOpcode() == ISD::SELECT && VT.isVector() &&
	VT.getVectorElementType() == MVT::i1 &&
	(DCI.isBeforeLegalize() \|\| (VT != MVT::v64i1 \|\| Subtarget.is64Bit()))) {
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
	bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode());
	bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode());

	if ((LHSIsConst \|\|
	(LHS.getOpcode() == ISD::BITCAST &&
	LHS.getOperand(0).getValueType() == IntVT)) &&
	(RHSIsConst \|\|
	(RHS.getOpcode() == ISD::BITCAST &&
	RHS.getOperand(0).getValueType() == IntVT))) {
	if (LHSIsConst)
	LHS = combinevXi1ConstantToInteger(LHS, DAG);
	else
	LHS = LHS.getOperand(0);

	if (RHSIsConst)
	RHS = combinevXi1ConstantToInteger(RHS, DAG);
	else
	RHS = RHS.getOperand(0);

	SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS);
	return DAG.getBitcast(VT, Select);
	}
	}

	// If this is "((X & C) == 0) ? Y : Z" and C is a constant mask vector of
	// single bits, then invert the predicate and swap the select operands.
	// This can lower using a vector shift bit-hack rather than mask and compare.
	if (DCI.isBeforeLegalize() && !Subtarget.hasAVX512() &&
	N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
	Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1 &&
	Cond.getOperand(0).getOpcode() == ISD::AND &&
	isNullOrNullSplat(Cond.getOperand(1)) &&
	cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
	Cond.getOperand(0).getValueType() == VT) {
	// The 'and' mask must be composed of power-of-2 constants.
	SDValue And = Cond.getOperand(0);
	auto *C = isConstOrConstSplat(And.getOperand(1));
	if (C && C->getAPIntValue().isPowerOf2()) {
	// vselect (X & C == 0), LHS, RHS --> vselect (X & C != 0), RHS, LHS
	SDValue NotCond =
	DAG.getSetCC(DL, CondVT, And, Cond.getOperand(1), ISD::SETNE);
	return DAG.getSelect(DL, VT, NotCond, RHS, LHS);
	}

	// If we have a non-splat but still powers-of-2 mask, AVX1 can use pmulld
	// and AVX2 can use vpsllv{dq}. 8-bit lacks a proper shift or multiply.
	// 16-bit lacks a proper blendv.
	unsigned EltBitWidth = VT.getScalarSizeInBits();
	bool CanShiftBlend =
	TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) \|\|
	(Subtarget.hasAVX2() && EltBitWidth == 64) \|\|
	(Subtarget.hasXOP()));
	if (CanShiftBlend &&
	ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) {
	return C->getAPIntValue().isPowerOf2();
	})) {
	// Create a left-shift constant to get the mask bits over to the sign-bit.
	SDValue Mask = And.getOperand(1);
	SmallVector<int, 32> ShlVals;
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
	auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i));
	ShlVals.push_back(EltBitWidth - 1 -
	MaskVal->getAPIntValue().exactLogBase2());
	}
	// vsel ((X & C) == 0), LHS, RHS --> vsel ((shl X, C') < 0), RHS, LHS
	SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL);
	SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt);
	SDValue NewCond =
	DAG.getSetCC(DL, CondVT, Shl, Cond.getOperand(1), ISD::SETLT);
	return DAG.getSelect(DL, VT, NewCond, RHS, LHS);
	}
	}

	return SDValue();
	}

	/// Combine:
	/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
	/// to:
	/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
	/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
	/// Note that this is only legal for some op/cc combinations.
	static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// This combine only operates on CMP-like nodes.
	if (!(Cmp.getOpcode() == X86ISD::CMP \|\|
	(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
	return SDValue();

	// Can't replace the cmp if it has more uses than the one we're looking at.
	// FIXME: We would like to be able to handle this, but would need to make sure
	// all uses were updated.
	if (!Cmp.hasOneUse())
	return SDValue();

	// This only applies to variations of the common case:
	// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
	// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
	// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
	// (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
	// Using the proper condcodes (see below), overflow is checked for.

	// FIXME: We can generalize both constraints:
	// - XOR/OR/AND (if they were made to survive AtomicExpand)
	// - LHS != 1
	// if the result is compared.

	SDValue CmpLHS = Cmp.getOperand(0);
	SDValue CmpRHS = Cmp.getOperand(1);
	EVT CmpVT = CmpLHS.getValueType();

	if (!CmpLHS.hasOneUse())
	return SDValue();

	unsigned Opc = CmpLHS.getOpcode();
	if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
	return SDValue();

	SDValue OpRHS = CmpLHS.getOperand(2);
	auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
	if (!OpRHSC)
	return SDValue();

	APInt Addend = OpRHSC->getAPIntValue();
	if (Opc == ISD::ATOMIC_LOAD_SUB)
	Addend = -Addend;

	auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
	if (!CmpRHSC)
	return SDValue();

	APInt Comparison = CmpRHSC->getAPIntValue();
	APInt NegAddend = -Addend;

	// See if we can adjust the CC to make the comparison match the negated
	// addend.
	if (Comparison != NegAddend) {
	APInt IncComparison = Comparison + 1;
	if (IncComparison == NegAddend) {
	if (CC == X86::COND_A && !Comparison.isMaxValue()) {
	Comparison = IncComparison;
	CC = X86::COND_AE;
	} else if (CC == X86::COND_LE && !Comparison.isMaxSignedValue()) {
	Comparison = IncComparison;
	CC = X86::COND_L;
	}
	}
	APInt DecComparison = Comparison - 1;
	if (DecComparison == NegAddend) {
	if (CC == X86::COND_AE && !Comparison.isMinValue()) {
	Comparison = DecComparison;
	CC = X86::COND_A;
	} else if (CC == X86::COND_L && !Comparison.isMinSignedValue()) {
	Comparison = DecComparison;
	CC = X86::COND_LE;
	}
	}
	}

	// If the addend is the negation of the comparison value, then we can do
	// a full comparison by emitting the atomic arithmetic as a locked sub.
	if (Comparison == NegAddend) {
	// The CC is fine, but we need to rewrite the LHS of the comparison as an
	// atomic sub.
	auto *AN = cast<AtomicSDNode>(CmpLHS.getNode());
	auto AtomicSub = DAG.getAtomic(
	ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpVT,
	/Chain/ CmpLHS.getOperand(0), /LHS/ CmpLHS.getOperand(1),
	/RHS/ DAG.getConstant(NegAddend, SDLoc(CmpRHS), CmpVT),
	AN->getMemOperand());
	auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
	DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT));
	DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
	return LockOp;
	}

	// We can handle comparisons with zero in a number of cases by manipulating
	// the CC used.
	if (!Comparison.isZero())
	return SDValue();

	if (CC == X86::COND_S && Addend == 1)
	CC = X86::COND_LE;
	else if (CC == X86::COND_NS && Addend == 1)
	CC = X86::COND_G;
	else if (CC == X86::COND_G && Addend == -1)
	CC = X86::COND_GE;
	else if (CC == X86::COND_LE && Addend == -1)
	CC = X86::COND_L;
	else
	return SDValue();

	SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget);
	DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT));
	DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
	return LockOp;
	}

	// Check whether a boolean test is testing a boolean value generated by
	// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
	// code.
	//
	// Simplify the following patterns:
	// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
	// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
	// to (Op EFLAGS Cond)
	//
	// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
	// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
	// to (Op EFLAGS !Cond)
	//
	// where Op could be BRCOND or CMOV.
	//
	static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
	// This combine only operates on CMP-like nodes.
	if (!(Cmp.getOpcode() == X86ISD::CMP \|\|
	(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
	return SDValue();

	// Quit if not used as a boolean value.
	if (CC != X86::COND_E && CC != X86::COND_NE)
	return SDValue();

	// Check CMP operands. One of them should be 0 or 1 and the other should be
	// an SetCC or extended from it.
	SDValue Op1 = Cmp.getOperand(0);
	SDValue Op2 = Cmp.getOperand(1);

	SDValue SetCC;
	const ConstantSDNode* C = nullptr;
	bool needOppositeCond = (CC == X86::COND_E);
	bool checkAgainstTrue = false; // Is it a comparison against 1?

	if ((C = dyn_cast<ConstantSDNode>(Op1)))
	SetCC = Op2;
	else if ((C = dyn_cast<ConstantSDNode>(Op2)))
	SetCC = Op1;
	else // Quit if all operands are not constants.
	return SDValue();

	if (C->getZExtValue() == 1) {
	needOppositeCond = !needOppositeCond;
	checkAgainstTrue = true;
	} else if (C->getZExtValue() != 0)
	// Quit if the constant is neither 0 or 1.
	return SDValue();

	bool truncatedToBoolWithAnd = false;
	// Skip (zext $x), (trunc $x), or (and $x, 1) node.
	while (SetCC.getOpcode() == ISD::ZERO_EXTEND \|\|
	SetCC.getOpcode() == ISD::TRUNCATE \|\|
	SetCC.getOpcode() == ISD::AND) {
	if (SetCC.getOpcode() == ISD::AND) {
	int OpIdx = -1;
	if (isOneConstant(SetCC.getOperand(0)))
	OpIdx = 1;
	if (isOneConstant(SetCC.getOperand(1)))
	OpIdx = 0;
	if (OpIdx < 0)
	break;
	SetCC = SetCC.getOperand(OpIdx);
	truncatedToBoolWithAnd = true;
	} else
	SetCC = SetCC.getOperand(0);
	}

	switch (SetCC.getOpcode()) {
	case X86ISD::SETCC_CARRY:
	// Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
	// simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
	// i.e. it's a comparison against true but the result of SETCC_CARRY is not
	// truncated to i1 using 'and'.
	if (checkAgainstTrue && !truncatedToBoolWithAnd)
	break;
	assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
	"Invalid use of SETCC_CARRY!");
	[[fallthrough]];
	case X86ISD::SETCC:
	// Set the condition code or opposite one if necessary.
	CC = X86::CondCode(SetCC.getConstantOperandVal(0));
	if (needOppositeCond)
	CC = X86::GetOppositeBranchCondition(CC);
	return SetCC.getOperand(1);
	case X86ISD::CMOV: {
	// Check whether false/true value has canonical one, i.e. 0 or 1.
	ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
	ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
	// Quit if true value is not a constant.
	if (!TVal)
	return SDValue();
	// Quit if false value is not a constant.
	if (!FVal) {
	SDValue Op = SetCC.getOperand(0);
	// Skip 'zext' or 'trunc' node.
	if (Op.getOpcode() == ISD::ZERO_EXTEND \|\|
	Op.getOpcode() == ISD::TRUNCATE)
	Op = Op.getOperand(0);
	// A special case for rdrand/rdseed, where 0 is set if false cond is
	// found.
	if ((Op.getOpcode() != X86ISD::RDRAND &&
	Op.getOpcode() != X86ISD::RDSEED) \|\| Op.getResNo() != 0)
	return SDValue();
	}
	// Quit if false value is not the constant 0 or 1.
	bool FValIsFalse = true;
	if (FVal && FVal->getZExtValue() != 0) {
	if (FVal->getZExtValue() != 1)
	return SDValue();
	// If FVal is 1, opposite cond is needed.
	needOppositeCond = !needOppositeCond;
	FValIsFalse = false;
	}
	// Quit if TVal is not the constant opposite of FVal.
	if (FValIsFalse && TVal->getZExtValue() != 1)
	return SDValue();
	if (!FValIsFalse && TVal->getZExtValue() != 0)
	return SDValue();
	CC = X86::CondCode(SetCC.getConstantOperandVal(2));
	if (needOppositeCond)
	CC = X86::GetOppositeBranchCondition(CC);
	return SetCC.getOperand(3);
	}
	}

	return SDValue();
	}

	/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
	/// Match:
	/// (X86or (X86setcc) (X86setcc))
	/// (X86cmp (and (X86setcc) (X86setcc)), 0)
	static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
	X86::CondCode &CC1, SDValue &Flags,
	bool &isAnd) {
	if (Cond->getOpcode() == X86ISD::CMP) {
	if (!isNullConstant(Cond->getOperand(1)))
	return false;

	Cond = Cond->getOperand(0);
	}

	isAnd = false;

	SDValue SetCC0, SetCC1;
	switch (Cond->getOpcode()) {
	default: return false;
	case ISD::AND:
	case X86ISD::AND:
	isAnd = true;
	[[fallthrough]];
	case ISD::OR:
	case X86ISD::OR:
	SetCC0 = Cond->getOperand(0);
	SetCC1 = Cond->getOperand(1);
	break;
	};

	// Make sure we have SETCC nodes, using the same flags value.
	if (SetCC0.getOpcode() != X86ISD::SETCC \|\|
	SetCC1.getOpcode() != X86ISD::SETCC \|\|
	SetCC0->getOperand(1) != SetCC1->getOperand(1))
	return false;

	CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
	CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
	Flags = SetCC0->getOperand(1);
	return true;
	}

	// When legalizing carry, we create carries via add X, -1
	// If that comes from an actual carry, via setcc, we use the
	// carry directly.
	static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
	if (EFLAGS.getOpcode() == X86ISD::ADD) {
	if (isAllOnesConstant(EFLAGS.getOperand(1))) {
	bool FoundAndLSB = false;
	SDValue Carry = EFLAGS.getOperand(0);
	while (Carry.getOpcode() == ISD::TRUNCATE \|\|
	Carry.getOpcode() == ISD::ZERO_EXTEND \|\|
	(Carry.getOpcode() == ISD::AND &&
	isOneConstant(Carry.getOperand(1)))) {
	FoundAndLSB \|= Carry.getOpcode() == ISD::AND;
	Carry = Carry.getOperand(0);
	}
	if (Carry.getOpcode() == X86ISD::SETCC \|\|
	Carry.getOpcode() == X86ISD::SETCC_CARRY) {
	// TODO: Merge this code with equivalent in combineAddOrSubToADCOrSBB?
	uint64_t CarryCC = Carry.getConstantOperandVal(0);
	SDValue CarryOp1 = Carry.getOperand(1);
	if (CarryCC == X86::COND_B)
	return CarryOp1;
	if (CarryCC == X86::COND_A) {
	// Try to convert COND_A into COND_B in an attempt to facilitate
	// materializing "setb reg".
	//
	// Do not flip "e > c", where "c" is a constant, because Cmp
	// instruction cannot take an immediate as its first operand.
	//
	if (CarryOp1.getOpcode() == X86ISD::SUB &&
	CarryOp1.getNode()->hasOneUse() &&
	CarryOp1.getValueType().isInteger() &&
	!isa<ConstantSDNode>(CarryOp1.getOperand(1))) {
	SDValue SubCommute =
	DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(),
	CarryOp1.getOperand(1), CarryOp1.getOperand(0));
	return SDValue(SubCommute.getNode(), CarryOp1.getResNo());
	}
	}
	// If this is a check of the z flag of an add with 1, switch to the
	// C flag.
	if (CarryCC == X86::COND_E &&
	CarryOp1.getOpcode() == X86ISD::ADD &&
	isOneConstant(CarryOp1.getOperand(1)))
	return CarryOp1;
	} else if (FoundAndLSB) {
	SDLoc DL(Carry);
	SDValue BitNo = DAG.getConstant(0, DL, Carry.getValueType());
	if (Carry.getOpcode() == ISD::SRL) {
	BitNo = Carry.getOperand(1);
	Carry = Carry.getOperand(0);
	}
	return getBT(Carry, BitNo, DL, DAG);
	}
	}
	}

	return SDValue();
	}

	/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC
	/// to avoid the inversion.
	static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// TODO: Handle X86ISD::KTEST/X86ISD::KORTEST.
	if (EFLAGS.getOpcode() != X86ISD::PTEST &&
	EFLAGS.getOpcode() != X86ISD::TESTP)
	return SDValue();

	// PTEST/TESTP sets EFLAGS as:
	// TESTZ: ZF = (Op0 & Op1) == 0
	// TESTC: CF = (~Op0 & Op1) == 0
	// TESTNZC: ZF == 0 && CF == 0
	EVT VT = EFLAGS.getValueType();
	SDValue Op0 = EFLAGS.getOperand(0);
	SDValue Op1 = EFLAGS.getOperand(1);
	EVT OpVT = Op0.getValueType();

	// TEST(~X,Y) == TEST(X,Y)
	if (SDValue NotOp0 = IsNOT(Op0, DAG)) {
	X86::CondCode InvCC;
	switch (CC) {
	case X86::COND_B:
	// testc -> testz.
	InvCC = X86::COND_E;
	break;
	case X86::COND_AE:
	// !testc -> !testz.
	InvCC = X86::COND_NE;
	break;
	case X86::COND_E:
	// testz -> testc.
	InvCC = X86::COND_B;
	break;
	case X86::COND_NE:
	// !testz -> !testc.
	InvCC = X86::COND_AE;
	break;
	case X86::COND_A:
	case X86::COND_BE:
	// testnzc -> testnzc (no change).
	InvCC = CC;
	break;
	default:
	InvCC = X86::COND_INVALID;
	break;
	}

	if (InvCC != X86::COND_INVALID) {
	CC = InvCC;
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
	DAG.getBitcast(OpVT, NotOp0), Op1);
	}
	}

	if (CC == X86::COND_E \|\| CC == X86::COND_NE) {
	// TESTZ(X,~Y) == TESTC(Y,X)
	if (SDValue NotOp1 = IsNOT(Op1, DAG)) {
	CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE);
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
	DAG.getBitcast(OpVT, NotOp1), Op0);
	}

	if (Op0 == Op1) {
	SDValue BC = peekThroughBitcasts(Op0);
	EVT BCVT = BC.getValueType();
	assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) &&
	"Unexpected vector type");

	// TESTZ(AND(X,Y),AND(X,Y)) == TESTZ(X,Y)
	if (BC.getOpcode() == ISD::AND \|\| BC.getOpcode() == X86ISD::FAND) {
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
	DAG.getBitcast(OpVT, BC.getOperand(0)),
	DAG.getBitcast(OpVT, BC.getOperand(1)));
	}

	// TESTZ(AND(~X,Y),AND(~X,Y)) == TESTC(X,Y)
	if (BC.getOpcode() == X86ISD::ANDNP \|\| BC.getOpcode() == X86ISD::FANDN) {
	CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE);
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
	DAG.getBitcast(OpVT, BC.getOperand(0)),
	DAG.getBitcast(OpVT, BC.getOperand(1)));
	}

	// If every element is an all-sign value, see if we can use MOVMSK to
	// more efficiently extract the sign bits and compare that.
	// TODO: Handle TESTC with comparison inversion.
	// TODO: Can we remove SimplifyMultipleUseDemandedBits and rely on
	// MOVMSK combines to make sure its never worse than PTEST?
	unsigned EltBits = BCVT.getScalarSizeInBits();
	if (DAG.ComputeNumSignBits(BC) == EltBits) {
	assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");
	APInt SignMask = APInt::getSignMask(EltBits);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (SDValue Res =
	TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) {
	// For vXi16 cases we need to use pmovmksb and extract every other
	// sign bit.
	SDLoc DL(EFLAGS);
	if (EltBits == 16) {
	MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
	Res = DAG.getBitcast(MovmskVT, Res);
	Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
	Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res,
	DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
	} else {
	Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
	}
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res,
	DAG.getConstant(0, DL, MVT::i32));
	}
	}
	}

	// TESTZ(-1,X) == TESTZ(X,X)
	if (ISD::isBuildVectorAllOnes(Op0.getNode()))
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1);

	// TESTZ(X,-1) == TESTZ(X,X)
	if (ISD::isBuildVectorAllOnes(Op1.getNode()))
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);

	// TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
	// TODO: Add COND_NE handling?
	if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) {
	SDValue Src0 = peekThroughBitcasts(Op0);
	SDValue Src1 = peekThroughBitcasts(Op1);
	if (Src0.getOpcode() == ISD::OR && Src1.getOpcode() == ISD::OR) {
	Src0 = getSplitVectorSrc(peekThroughBitcasts(Src0.getOperand(0)),
	peekThroughBitcasts(Src0.getOperand(1)), true);
	Src1 = getSplitVectorSrc(peekThroughBitcasts(Src1.getOperand(0)),
	peekThroughBitcasts(Src1.getOperand(1)), true);
	if (Src0 && Src1)
	return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
	DAG.getBitcast(MVT::v4i64, Src0),
	DAG.getBitcast(MVT::v4i64, Src1));
	}
	}
	}

	return SDValue();
	}

	// Attempt to simplify the MOVMSK input based on the comparison type.
	static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Handle eq/ne against zero (any_of).
	// Handle eq/ne against -1 (all_of).
	if (!(CC == X86::COND_E \|\| CC == X86::COND_NE))
	return SDValue();
	if (EFLAGS.getValueType() != MVT::i32)
	return SDValue();
	unsigned CmpOpcode = EFLAGS.getOpcode();
	if (CmpOpcode != X86ISD::CMP && CmpOpcode != X86ISD::SUB)
	return SDValue();
	auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1));
	if (!CmpConstant)
	return SDValue();
	const APInt &CmpVal = CmpConstant->getAPIntValue();

	SDValue CmpOp = EFLAGS.getOperand(0);
	unsigned CmpBits = CmpOp.getValueSizeInBits();
	assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch");

	// Peek through any truncate.
	if (CmpOp.getOpcode() == ISD::TRUNCATE)
	CmpOp = CmpOp.getOperand(0);

	// Bail if we don't find a MOVMSK.
	if (CmpOp.getOpcode() != X86ISD::MOVMSK)
	return SDValue();

	SDValue Vec = CmpOp.getOperand(0);
	MVT VecVT = Vec.getSimpleValueType();
	assert((VecVT.is128BitVector() \|\| VecVT.is256BitVector()) &&
	"Unexpected MOVMSK operand");
	unsigned NumElts = VecVT.getVectorNumElements();
	unsigned NumEltBits = VecVT.getScalarSizeInBits();

	bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isZero();
	bool IsAllOf = (CmpOpcode == X86ISD::SUB \|\| CmpOpcode == X86ISD::CMP) &&
	NumElts <= CmpBits && CmpVal.isMask(NumElts);
	if (!IsAnyOf && !IsAllOf)
	return SDValue();

	// TODO: Check more combining cases for me.
	// Here we check the cmp use number to decide do combining or not.
	// Currently we only get 2 tests about combining "MOVMSK(CONCAT(..))"
	// and "MOVMSK(PCMPEQ(..))" are fit to use this constraint.
	bool IsOneUse = CmpOp.getNode()->hasOneUse();

	// See if we can peek through to a vector with a wider element type, if the
	// signbits extend down to all the sub-elements as well.
	// Calling MOVMSK with the wider type, avoiding the bitcast, helps expose
	// potential SimplifyDemandedBits/Elts cases.
	// If we looked through a truncate that discard bits, we can't do this
	// transform.
	// FIXME: We could do this transform for truncates that discarded bits by
	// inserting an AND mask between the new MOVMSK and the CMP.
	if (Vec.getOpcode() == ISD::BITCAST && NumElts <= CmpBits) {
	SDValue BC = peekThroughBitcasts(Vec);
	MVT BCVT = BC.getSimpleValueType();
	unsigned BCNumElts = BCVT.getVectorNumElements();
	unsigned BCNumEltBits = BCVT.getScalarSizeInBits();
	if ((BCNumEltBits == 32 \|\| BCNumEltBits == 64) &&
	BCNumEltBits > NumEltBits &&
	DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) {
	SDLoc DL(EFLAGS);
	APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : BCNumElts);
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
	DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC),
	DAG.getConstant(CmpMask, DL, MVT::i32));
	}
	}

	// MOVMSK(CONCAT(X,Y)) == 0 -> MOVMSK(OR(X,Y)).
	// MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)).
	// MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)).
	// MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)).
	if (VecVT.is256BitVector() && NumElts <= CmpBits && IsOneUse) {
	SmallVector<SDValue> Ops;
	if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops, DAG) &&
	Ops.size() == 2) {
	SDLoc DL(EFLAGS);
	EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
	APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2);
	SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT,
	DAG.getBitcast(SubVT, Ops[0]),
	DAG.getBitcast(SubVT, Ops[1]));
	V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V);
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
	DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V),
	DAG.getConstant(CmpMask, DL, MVT::i32));
	}
	}

	// MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
	// MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
	// MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
	// MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
	if (IsAllOf && Subtarget.hasSSE41() && IsOneUse) {
	MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
	SDValue BC = peekThroughBitcasts(Vec);
	// Ensure MOVMSK was testing every signbit of BC.
	if (BC.getValueType().getVectorNumElements() <= NumElts) {
	if (BC.getOpcode() == X86ISD::PCMPEQ) {
	SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
	BC.getOperand(0), BC.getOperand(1));
	V = DAG.getBitcast(TestVT, V);
	return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
	}
	// Check for 256-bit split vector cases.
	if (BC.getOpcode() == ISD::AND &&
	BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
	BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
	SDValue LHS = BC.getOperand(0);
	SDValue RHS = BC.getOperand(1);
	LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
	LHS.getOperand(0), LHS.getOperand(1));
	RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
	RHS.getOperand(0), RHS.getOperand(1));
	LHS = DAG.getBitcast(TestVT, LHS);
	RHS = DAG.getBitcast(TestVT, RHS);
	SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
	return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
	}
	}
	}

	// See if we can avoid a PACKSS by calling MOVMSK on the sources.
	// For vXi16 cases we can use a v2Xi8 PMOVMSKB. We must mask out
	// sign bits prior to the comparison with zero unless we know that
	// the vXi16 splats the sign bit down to the lower i8 half.
	// TODO: Handle all_of patterns.
	if (Vec.getOpcode() == X86ISD::PACKSS && VecVT == MVT::v16i8) {
	SDValue VecOp0 = Vec.getOperand(0);
	SDValue VecOp1 = Vec.getOperand(1);
	bool SignExt0 = DAG.ComputeNumSignBits(VecOp0) > 8;
	bool SignExt1 = DAG.ComputeNumSignBits(VecOp1) > 8;
	// PMOVMSKB(PACKSSBW(X, undef)) -> PMOVMSKB(BITCAST_v16i8(X)) & 0xAAAA.
	if (IsAnyOf && CmpBits == 8 && VecOp1.isUndef()) {
	SDLoc DL(EFLAGS);
	SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0);
	Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
	Result = DAG.getZExtOrTrunc(Result, DL, MVT::i16);
	if (!SignExt0) {
	Result = DAG.getNode(ISD::AND, DL, MVT::i16, Result,
	DAG.getConstant(0xAAAA, DL, MVT::i16));
	}
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
	DAG.getConstant(0, DL, MVT::i16));
	}
	// PMOVMSKB(PACKSSBW(LO(X), HI(X)))
	// -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA.
	if (CmpBits >= 16 && Subtarget.hasInt256() &&
	(IsAnyOf \|\| (SignExt0 && SignExt1))) {
	if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) {
	SDLoc DL(EFLAGS);
	SDValue Result = peekThroughBitcasts(Src);
	if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ &&
	Result.getValueType().getVectorNumElements() <= NumElts) {
	SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
	Result.getOperand(0), Result.getOperand(1));
	V = DAG.getBitcast(MVT::v4i64, V);
	return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
	}
	Result = DAG.getBitcast(MVT::v32i8, Result);
	Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
	unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
	if (!SignExt0 \|\| !SignExt1) {
	assert(IsAnyOf &&
	"Only perform v16i16 signmasks for any_of patterns");
	Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
	DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
	}
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
	DAG.getConstant(CmpMask, DL, MVT::i32));
	}
	}
	}

	// MOVMSK(SHUFFLE(X,u)) -> MOVMSK(X) iff every element is referenced.
	SmallVector<int, 32> ShuffleMask;
	SmallVector<SDValue, 2> ShuffleInputs;
	if (NumElts <= CmpBits &&
	getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs,
	ShuffleMask, DAG) &&
	ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) &&
	ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) {
	unsigned NumShuffleElts = ShuffleMask.size();
	APInt DemandedElts = APInt::getZero(NumShuffleElts);
	for (int M : ShuffleMask) {
	assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index");
	DemandedElts.setBit(M);
	}
	if (DemandedElts.isAllOnes()) {
	SDLoc DL(EFLAGS);
	SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
	Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
	Result =
	DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType());
	return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
	EFLAGS.getOperand(1));
	}
	}

	return SDValue();
	}

	/// Optimize an EFLAGS definition used according to the condition code \p CC
	/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
	/// uses of chain values.
	static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (CC == X86::COND_B)
	if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG))
	return Flags;

	if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
	return R;

	if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget))
	return R;

	if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
	return R;

	return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
	}

	/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
	static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);

	SDValue FalseOp = N->getOperand(0);
	SDValue TrueOp = N->getOperand(1);
	X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
	SDValue Cond = N->getOperand(3);

	// cmov X, X, ?, ? --> X
	if (TrueOp == FalseOp)
	return TrueOp;

	// Try to simplify the EFLAGS and condition code operands.
	// We can't always do this as FCMOV only supports a subset of X86 cond.
	if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
	if (!(FalseOp.getValueType() == MVT::f80 \|\|
	(FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) \|\|
	(FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) \|\|
	!Subtarget.canUseCMOV() \|\| hasFPCMov(CC)) {
	SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8),
	Flags};
	return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
	}
	}

	// If this is a select between two integer constants, try to do some
	// optimizations. Note that the operands are ordered the opposite of SELECT
	// operands.
	if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
	if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
	// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
	// larger than FalseC (the false value).
	if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
	CC = X86::GetOppositeBranchCondition(CC);
	std::swap(TrueC, FalseC);
	std::swap(TrueOp, FalseOp);
	}

	// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
	// This is efficient for any integer data type (including i8/i16) and
	// shift amount.
	if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
	Cond = getSETCC(CC, Cond, DL, DAG);

	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);

	unsigned ShAmt = TrueC->getAPIntValue().logBase2();
	Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
	DAG.getConstant(ShAmt, DL, MVT::i8));
	return Cond;
	}

	// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
	// for any integer data type, including i8/i16.
	if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
	Cond = getSETCC(CC, Cond, DL, DAG);

	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
	FalseC->getValueType(0), Cond);
	Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
	SDValue(FalseC, 0));
	return Cond;
	}

	// Optimize cases that will turn into an LEA instruction. This requires
	// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
	if (N->getValueType(0) == MVT::i32 \|\| N->getValueType(0) == MVT::i64) {
	APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
	assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() &&
	"Implicit constant truncation");

	bool isFastMultiplier = false;
	if (Diff.ult(10)) {
	switch (Diff.getZExtValue()) {
	default: break;
	case 1: // result = add base, cond
	case 2: // result = lea base( , cond*2)
	case 3: // result = lea base(cond, cond*2)
	case 4: // result = lea base( , cond*4)
	case 5: // result = lea base(cond, cond*4)
	case 8: // result = lea base( , cond*8)
	case 9: // result = lea base(cond, cond*8)
	isFastMultiplier = true;
	break;
	}
	}

	if (isFastMultiplier) {
	Cond = getSETCC(CC, Cond, DL ,DAG);
	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
	Cond);
	// Scale the condition by the difference.
	if (Diff != 1)
	Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
	DAG.getConstant(Diff, DL, Cond.getValueType()));

	// Add the base if non-zero.
	if (FalseC->getAPIntValue() != 0)
	Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
	SDValue(FalseC, 0));
	return Cond;
	}
	}
	}
	}

	// Handle these cases:
	// (select (x != c), e, c) -> select (x != c), e, x),
	// (select (x == c), c, e) -> select (x == c), x, e)
	// where the c is an integer constant, and the "select" is the combination
	// of CMOV and CMP.
	//
	// The rationale for this change is that the conditional-move from a constant
	// needs two instructions, however, conditional-move from a register needs
	// only one instruction.
	//
	// CAVEAT: By replacing a constant with a symbolic value, it may obscure
	// some instruction-combining opportunities. This opt needs to be
	// postponed as late as possible.
	//
	if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
	// the DCI.xxxx conditions are provided to postpone the optimization as
	// late as possible.

	ConstantSDNode *CmpAgainst = nullptr;
	if ((Cond.getOpcode() == X86ISD::CMP \|\| Cond.getOpcode() == X86ISD::SUB) &&
	(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
	!isa<ConstantSDNode>(Cond.getOperand(0))) {

	if (CC == X86::COND_NE &&
	CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
	CC = X86::GetOppositeBranchCondition(CC);
	std::swap(TrueOp, FalseOp);
	}

	if (CC == X86::COND_E &&
	CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
	SDValue Ops[] = {FalseOp, Cond.getOperand(0),
	DAG.getTargetConstant(CC, DL, MVT::i8), Cond};
	return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
	}
	}
	}

	// Fold and/or of setcc's to double CMOV:
	// (CMOV F, T, ((cc1 \| cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
	// (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
	//
	// This combine lets us generate:
	// cmovcc1 (jcc1 if we don't have CMOV)
	// cmovcc2 (same)
	// instead of:
	// setcc1
	// setcc2
	// and/or
	// cmovne (jne if we don't have CMOV)
	// When we can't use the CMOV instruction, it might increase branch
	// mispredicts.
	// When we can use CMOV, or when there is no mispredict, this improves
	// throughput and reduces register pressure.
	//
	if (CC == X86::COND_NE) {
	SDValue Flags;
	X86::CondCode CC0, CC1;
	bool isAndSetCC;
	if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
	if (isAndSetCC) {
	std::swap(FalseOp, TrueOp);
	CC0 = X86::GetOppositeBranchCondition(CC0);
	CC1 = X86::GetOppositeBranchCondition(CC1);
	}

	SDValue LOps[] = {FalseOp, TrueOp,
	DAG.getTargetConstant(CC0, DL, MVT::i8), Flags};
	SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps);
	SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8),
	Flags};
	SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
	return CMOV;
	}
	}

	// Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) ->
	// (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2)
	// Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) ->
	// (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2)
	if ((CC == X86::COND_NE \|\| CC == X86::COND_E) &&
	Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) {
	SDValue Add = TrueOp;
	SDValue Const = FalseOp;
	// Canonicalize the condition code for easier matching and output.
	if (CC == X86::COND_E)
	std::swap(Add, Const);

	// We might have replaced the constant in the cmov with the LHS of the
	// compare. If so change it to the RHS of the compare.
	if (Const == Cond.getOperand(0))
	Const = Cond.getOperand(1);

	// Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
	if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
	Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) &&
	(Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF \|\|
	Add.getOperand(0).getOpcode() == ISD::CTTZ) &&
	Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) {
	EVT VT = N->getValueType(0);
	// This should constant fold.
	SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
	SDValue CMov =
	DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
	DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond);
	return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
	}
	}

	return SDValue();
	}

	/// Different mul shrinking modes.
	enum class ShrinkMode { MULS8, MULU8, MULS16, MULU16 };

	static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
	EVT VT = N->getOperand(0).getValueType();
	if (VT.getScalarSizeInBits() != 32)
	return false;

	assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2");
	unsigned SignBits[2] = {1, 1};
	bool IsPositive[2] = {false, false};
	for (unsigned i = 0; i < 2; i++) {
	SDValue Opd = N->getOperand(i);

	SignBits[i] = DAG.ComputeNumSignBits(Opd);
	IsPositive[i] = DAG.SignBitIsZero(Opd);
	}

	bool AllPositive = IsPositive[0] && IsPositive[1];
	unsigned MinSignBits = std::min(SignBits[0], SignBits[1]);
	// When ranges are from -128 ~ 127, use MULS8 mode.
	if (MinSignBits >= 25)
	Mode = ShrinkMode::MULS8;
	// When ranges are from 0 ~ 255, use MULU8 mode.
	else if (AllPositive && MinSignBits >= 24)
	Mode = ShrinkMode::MULU8;
	// When ranges are from -32768 ~ 32767, use MULS16 mode.
	else if (MinSignBits >= 17)
	Mode = ShrinkMode::MULS16;
	// When ranges are from 0 ~ 65535, use MULU16 mode.
	else if (AllPositive && MinSignBits >= 16)
	Mode = ShrinkMode::MULU16;
	else
	return false;
	return true;
	}

	/// When the operands of vector mul are extended from smaller size values,
	/// like i8 and i16, the type of mul may be shrinked to generate more
	/// efficient code. Two typical patterns are handled:
	/// Pattern1:
	/// %2 = sext/zext <N x i8> %1 to <N x i32>
	/// %4 = sext/zext <N x i8> %3 to <N x i32>
	// or %4 = build_vector <N x i32> %C1, ..., %CN (%C1..%CN are constants)
	/// %5 = mul <N x i32> %2, %4
	///
	/// Pattern2:
	/// %2 = zext/sext <N x i16> %1 to <N x i32>
	/// %4 = zext/sext <N x i16> %3 to <N x i32>
	/// or %4 = build_vector <N x i32> %C1, ..., %CN (%C1..%CN are constants)
	/// %5 = mul <N x i32> %2, %4
	///
	/// There are four mul shrinking modes:
	/// If %2 == sext32(trunc8(%2)), i.e., the scalar value range of %2 is
	/// -128 to 128, and the scalar value range of %4 is also -128 to 128,
	/// generate pmullw+sext32 for it (MULS8 mode).
	/// If %2 == zext32(trunc8(%2)), i.e., the scalar value range of %2 is
	/// 0 to 255, and the scalar value range of %4 is also 0 to 255,
	/// generate pmullw+zext32 for it (MULU8 mode).
	/// If %2 == sext32(trunc16(%2)), i.e., the scalar value range of %2 is
	/// -32768 to 32767, and the scalar value range of %4 is also -32768 to 32767,
	/// generate pmullw+pmulhw for it (MULS16 mode).
	/// If %2 == zext32(trunc16(%2)), i.e., the scalar value range of %2 is
	/// 0 to 65535, and the scalar value range of %4 is also 0 to 65535,
	/// generate pmullw+pmulhuw for it (MULU16 mode).
	static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Check for legality
	// pmullw/pmulhw are not supported by SSE.
	if (!Subtarget.hasSSE2())
	return SDValue();

	// Check for profitability
	// pmulld is supported since SSE41. It is better to use pmulld
	// instead of pmullw+pmulhw, except for subtargets where pmulld is slower than
	// the expansion.
	bool OptForMinSize = DAG.getMachineFunction().getFunction().hasMinSize();
	if (Subtarget.hasSSE41() && (OptForMinSize \|\| !Subtarget.isPMULLDSlow()))
	return SDValue();

	ShrinkMode Mode;
	if (!canReduceVMulWidth(N, DAG, Mode))
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getOperand(0).getValueType();
	unsigned NumElts = VT.getVectorNumElements();
	if ((NumElts % 2) != 0)
	return SDValue();

	EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);

	// Shrink the operands of mul.
	SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
	SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);

	// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
	// lower part is needed.
	SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
	if (Mode == ShrinkMode::MULU8 \|\| Mode == ShrinkMode::MULS8)
	return DAG.getNode((Mode == ShrinkMode::MULU8) ? ISD::ZERO_EXTEND
	: ISD::SIGN_EXTEND,
	DL, VT, MulLo);

	EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts / 2);
	// Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
	// the higher part is also needed.
	SDValue MulHi =
	DAG.getNode(Mode == ShrinkMode::MULS16 ? ISD::MULHS : ISD::MULHU, DL,
	ReducedVT, NewN0, NewN1);

	// Repack the lower part and higher part result of mul into a wider
	// result.
	// Generate shuffle functioning as punpcklwd.
	SmallVector<int, 16> ShuffleMask(NumElts);
	for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
	ShuffleMask[2 * i] = i;
	ShuffleMask[2 * i + 1] = i + NumElts;
	}
	SDValue ResLo =
	DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
	ResLo = DAG.getBitcast(ResVT, ResLo);
	// Generate shuffle functioning as punpckhwd.
	for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
	ShuffleMask[2 * i] = i + NumElts / 2;
	ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
	}
	SDValue ResHi =
	DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
	ResHi = DAG.getBitcast(ResVT, ResHi);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
	}

	static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
	EVT VT, const SDLoc &DL) {

	auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
	SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(Mult, DL, VT));
	Result = DAG.getNode(ISD::SHL, DL, VT, Result,
	DAG.getConstant(Shift, DL, MVT::i8));
	Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
	N->getOperand(0));
	return Result;
	};

	auto combineMulMulAddOrSub = [&](int Mul1, int Mul2, bool isAdd) {
	SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(Mul1, DL, VT));
	Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result,
	DAG.getConstant(Mul2, DL, VT));
	Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
	N->getOperand(0));
	return Result;
	};

	switch (MulAmt) {
	default:
	break;
	case 11:
	// mul x, 11 => add ((shl (mul x, 5), 1), x)
	return combineMulShlAddOrSub(5, 1, /isAdd/ true);
	case 21:
	// mul x, 21 => add ((shl (mul x, 5), 2), x)
	return combineMulShlAddOrSub(5, 2, /isAdd/ true);
	case 41:
	// mul x, 41 => add ((shl (mul x, 5), 3), x)
	return combineMulShlAddOrSub(5, 3, /isAdd/ true);
	case 22:
	// mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
	return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
	combineMulShlAddOrSub(5, 2, /isAdd/ true));
	case 19:
	// mul x, 19 => add ((shl (mul x, 9), 1), x)
	return combineMulShlAddOrSub(9, 1, /isAdd/ true);
	case 37:
	// mul x, 37 => add ((shl (mul x, 9), 2), x)
	return combineMulShlAddOrSub(9, 2, /isAdd/ true);
	case 73:
	// mul x, 73 => add ((shl (mul x, 9), 3), x)
	return combineMulShlAddOrSub(9, 3, /isAdd/ true);
	case 13:
	// mul x, 13 => add ((shl (mul x, 3), 2), x)
	return combineMulShlAddOrSub(3, 2, /isAdd/ true);
	case 23:
	// mul x, 23 => sub ((shl (mul x, 3), 3), x)
	return combineMulShlAddOrSub(3, 3, /isAdd/ false);
	case 26:
	// mul x, 26 => add ((mul (mul x, 5), 5), x)
	return combineMulMulAddOrSub(5, 5, /isAdd/ true);
	case 28:
	// mul x, 28 => add ((mul (mul x, 9), 3), x)
	return combineMulMulAddOrSub(9, 3, /isAdd/ true);
	case 29:
	// mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
	return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
	combineMulMulAddOrSub(9, 3, /isAdd/ true));
	}

	// Another trick. If this is a power 2 + 2/4/8, we can use a shift followed
	// by a single LEA.
	// First check if this a sum of two power of 2s because that's easy. Then
	// count how many zeros are up to the first bit.
	// TODO: We can do this even without LEA at a cost of two shifts and an add.
	if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
	unsigned ScaleShift = countTrailingZeros(MulAmt);
	if (ScaleShift >= 1 && ScaleShift < 4) {
	unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
	SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(ShiftAmt, DL, MVT::i8));
	SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(ScaleShift, DL, MVT::i8));
	return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
	}
	}

	return SDValue();
	}

	// If the upper 17 bits of either element are zero and the other element are
	// zero/sign bits then we can use PMADDWD, which is always at least as quick as
	// PMULLD, except on KNL.
	static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasSSE2())
	return SDValue();

	if (Subtarget.isPMADDWDSlow())
	return SDValue();

	EVT VT = N->getValueType(0);

	// Only support vXi32 vectors.
	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i32)
	return SDValue();

	// Make sure the type is legal or can split/widen to a legal type.
	// With AVX512 but without BWI, we would need to split v32i16.
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts == 1 \|\| !isPowerOf2_32(NumElts))
	return SDValue();

	// With AVX512 but without BWI, we would need to split v32i16.
	if (32 <= (2 * NumElts) && Subtarget.hasAVX512() && !Subtarget.hasBWI())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// If we are zero/sign extending two steps without SSE4.1, its better to
	// reduce the vmul width instead.
	if (!Subtarget.hasSSE41() &&
	(((N0.getOpcode() == ISD::ZERO_EXTEND &&
	N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
	(N1.getOpcode() == ISD::ZERO_EXTEND &&
	N1.getOperand(0).getScalarValueSizeInBits() <= 8)) \|\|
	((N0.getOpcode() == ISD::SIGN_EXTEND &&
	N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
	(N1.getOpcode() == ISD::SIGN_EXTEND &&
	N1.getOperand(0).getScalarValueSizeInBits() <= 8))))
	return SDValue();

	// If we are sign extending a wide vector without SSE4.1, its better to reduce
	// the vmul width instead.
	if (!Subtarget.hasSSE41() &&
	(N0.getOpcode() == ISD::SIGN_EXTEND &&
	N0.getOperand(0).getValueSizeInBits() > 128) &&
	(N1.getOpcode() == ISD::SIGN_EXTEND &&
	N1.getOperand(0).getValueSizeInBits() > 128))
	return SDValue();

	// Sign bits must extend down to the lowest i16.
	if (DAG.ComputeMaxSignificantBits(N1) > 16 \|\|
	DAG.ComputeMaxSignificantBits(N0) > 16)
	return SDValue();

	// At least one of the elements must be zero in the upper 17 bits, or can be
	// safely made zero without altering the final result.
	auto GetZeroableOp = [&](SDValue Op) {
	APInt Mask17 = APInt::getHighBitsSet(32, 17);
	if (DAG.MaskedValueIsZero(Op, Mask17))
	return Op;
	// Mask off upper 16-bits of sign-extended constants.
	if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
	return DAG.getNode(ISD::AND, SDLoc(N), VT, Op,
	DAG.getConstant(0xFFFF, SDLoc(N), VT));
	if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) {
	SDValue Src = Op.getOperand(0);
	// Convert sext(vXi16) to zext(vXi16).
	if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128)
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
	// Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets
	// which will expand the extension.
	if (Src.getScalarValueSizeInBits() < 16 && !Subtarget.hasSSE41()) {
	EVT ExtVT = VT.changeVectorElementType(MVT::i16);
	Src = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), ExtVT, Src);
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
	}
	}
	// Convert SIGN_EXTEND_VECTOR_INREG to ZEXT_EXTEND_VECTOR_INREG.
	if (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
	N->isOnlyUserOf(Op.getNode())) {
	SDValue Src = Op.getOperand(0);
	if (Src.getScalarValueSizeInBits() == 16)
	return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT, Src);
	}
	// Convert VSRAI(Op, 16) to VSRLI(Op, 16).
	if (Op.getOpcode() == X86ISD::VSRAI && Op.getConstantOperandVal(1) == 16 &&
	N->isOnlyUserOf(Op.getNode())) {
	return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, Op.getOperand(0),
	Op.getOperand(1));
	}
	return SDValue();
	};
	SDValue ZeroN0 = GetZeroableOp(N0);
	SDValue ZeroN1 = GetZeroableOp(N1);
	if (!ZeroN0 && !ZeroN1)
	return SDValue();
	N0 = ZeroN0 ? ZeroN0 : N0;
	N1 = ZeroN1 ? ZeroN1 : N1;

	// Use SplitOpsAndApply to handle AVX splitting.
	auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	MVT ResVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
	MVT OpVT = MVT::getVectorVT(MVT::i16, Ops[0].getValueSizeInBits() / 16);
	return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT,
	DAG.getBitcast(OpVT, Ops[0]),
	DAG.getBitcast(OpVT, Ops[1]));
	};
	return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1},
	PMADDWDBuilder);
	}

	static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasSSE2())
	return SDValue();

	EVT VT = N->getValueType(0);

	// Only support vXi64 vectors.
	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i64 \|\|
	VT.getVectorNumElements() < 2 \|\|
	!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// MULDQ returns the 64-bit result of the signed multiplication of the lower
	// 32-bits. We can lower with this if the sign bits stretch that far.
	if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(N0) > 32 &&
	DAG.ComputeNumSignBits(N1) > 32) {
	auto PMULDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops);
	};
	return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 },
	PMULDQBuilder, /CheckBWI/false);
	}

	// If the upper bits are zero we can use a single pmuludq.
	APInt Mask = APInt::getHighBitsSet(64, 32);
	if (DAG.MaskedValueIsZero(N0, Mask) && DAG.MaskedValueIsZero(N1, Mask)) {
	auto PMULUDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops);
	};
	return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 },
	PMULUDQBuilder, /CheckBWI/false);
	}

	return SDValue();
	}

	static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);

	if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget))
	return V;

	if (SDValue V = combineMulToPMULDQ(N, DAG, Subtarget))
	return V;

	if (DCI.isBeforeLegalize() && VT.isVector())
	return reduceVMULWidth(N, DAG, Subtarget);

	// Optimize a single multiply with constant into two operations in order to
	// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
	if (!MulConstantOptimization)
	return SDValue();

	// An imul is usually smaller than the alternative sequence.
	if (DAG.getMachineFunction().getFunction().hasMinSize())
	return SDValue();

	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
	return SDValue();

	if (VT != MVT::i64 && VT != MVT::i32)
	return SDValue();

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C)
	return SDValue();
	if (isPowerOf2_64(C->getZExtValue()))
	return SDValue();

	int64_t SignMulAmt = C->getSExtValue();
	assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
	uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;

	SDLoc DL(N);
	if (AbsMulAmt == 3 \|\| AbsMulAmt == 5 \|\| AbsMulAmt == 9) {
	SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(AbsMulAmt, DL, VT));
	if (SignMulAmt < 0)
	NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	NewMul);

	return NewMul;
	}

	uint64_t MulAmt1 = 0;
	uint64_t MulAmt2 = 0;
	if ((AbsMulAmt % 9) == 0) {
	MulAmt1 = 9;
	MulAmt2 = AbsMulAmt / 9;
	} else if ((AbsMulAmt % 5) == 0) {
	MulAmt1 = 5;
	MulAmt2 = AbsMulAmt / 5;
	} else if ((AbsMulAmt % 3) == 0) {
	MulAmt1 = 3;
	MulAmt2 = AbsMulAmt / 3;
	}

	SDValue NewMul;
	// For negative multiply amounts, only allow MulAmt2 to be a power of 2.
	if (MulAmt2 &&
	(isPowerOf2_64(MulAmt2) \|\|
	(SignMulAmt >= 0 && (MulAmt2 == 3 \|\| MulAmt2 == 5 \|\| MulAmt2 == 9)))) {

	if (isPowerOf2_64(MulAmt2) &&
	!(SignMulAmt >= 0 && N->hasOneUse() &&
	N->use_begin()->getOpcode() == ISD::ADD))
	// If second multiplifer is pow2, issue it first. We want the multiply by
	// 3, 5, or 9 to be folded into the addressing mode unless the lone use
	// is an add. Only do this for positive multiply amounts since the
	// negate would prevent it from being used as an address mode anyway.
	std::swap(MulAmt1, MulAmt2);

	if (isPowerOf2_64(MulAmt1))
	NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
	else
	NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(MulAmt1, DL, VT));

	if (isPowerOf2_64(MulAmt2))
	NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
	DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8));
	else
	NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
	DAG.getConstant(MulAmt2, DL, VT));

	// Negate the result.
	if (SignMulAmt < 0)
	NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	NewMul);
	} else if (!Subtarget.slowLEA())
	NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);

	if (!NewMul) {
	assert(C->getZExtValue() != 0 &&
	C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
	"Both cases that could cause potential overflows should have "
	"already been handled.");
	if (isPowerOf2_64(AbsMulAmt - 1)) {
	// (mul x, 2^N + 1) => (add (shl x, N), x)
	NewMul = DAG.getNode(
	ISD::ADD, DL, VT, N->getOperand(0),
	DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(AbsMulAmt - 1), DL,
	MVT::i8)));
	// To negate, subtract the number from zero
	if (SignMulAmt < 0)
	NewMul = DAG.getNode(ISD::SUB, DL, VT,
	DAG.getConstant(0, DL, VT), NewMul);
	} else if (isPowerOf2_64(AbsMulAmt + 1)) {
	// (mul x, 2^N - 1) => (sub (shl x, N), x)
	NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(AbsMulAmt + 1),
	DL, MVT::i8));
	// To negate, reverse the operands of the subtract.
	if (SignMulAmt < 0)
	NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul);
	else
	NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0));
	} else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) {
	// (mul x, 2^N + 2) => (add (shl x, N), (add x, x))
	NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(AbsMulAmt - 2),
	DL, MVT::i8));
	NewMul = DAG.getNode(
	ISD::ADD, DL, VT, NewMul,
	DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0)));
	} else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) {
	// (mul x, 2^N - 2) => (sub (shl x, N), (add x, x))
	NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(AbsMulAmt + 2),
	DL, MVT::i8));
	NewMul = DAG.getNode(
	ISD::SUB, DL, VT, NewMul,
	DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0)));
	}
	}

	return NewMul;
	}

	// Try to form a MULHU or MULHS node by looking for
	// (srl (mul ext, ext), 16)
	// TODO: This is X86 specific because we want to be able to handle wide types
	// before type legalization. But we can only do it if the vector will be
	// legalized via widening/splitting. Type legalization can't handle promotion
	// of a MULHU/MULHS. There isn't a way to convey this to the generic DAG
	// combiner.
	static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) &&
	"SRL or SRA node is required here!");
	SDLoc DL(N);

	if (!Subtarget.hasSSE2())
	return SDValue();

	// The operation feeding into the shift must be a multiply.
	SDValue ShiftOperand = N->getOperand(0);
	if (ShiftOperand.getOpcode() != ISD::MUL \|\| !ShiftOperand.hasOneUse())
	return SDValue();

	// Input type should be at least vXi32.
	EVT VT = N->getValueType(0);
	if (!VT.isVector() \|\| VT.getVectorElementType().getSizeInBits() < 32)
	return SDValue();

	// Need a shift by 16.
	APInt ShiftAmt;
	if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) \|\|
	ShiftAmt != 16)
	return SDValue();

	SDValue LHS = ShiftOperand.getOperand(0);
	SDValue RHS = ShiftOperand.getOperand(1);

	unsigned ExtOpc = LHS.getOpcode();
	if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) \|\|
	RHS.getOpcode() != ExtOpc)
	return SDValue();

	// Peek through the extends.
	LHS = LHS.getOperand(0);
	RHS = RHS.getOperand(0);

	// Ensure the input types match.
	EVT MulVT = LHS.getValueType();
	if (MulVT.getVectorElementType() != MVT::i16 \|\| RHS.getValueType() != MulVT)
	return SDValue();

	unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU;
	SDValue Mulh = DAG.getNode(Opc, DL, MulVT, LHS, RHS);

	ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	return DAG.getNode(ExtOpc, DL, VT, Mulh);
	}

	static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	EVT VT = N0.getValueType();

	// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
	// since the result of setcc_c is all zero's or all ones.
	if (VT.isInteger() && !VT.isVector() &&
	N1C && N0.getOpcode() == ISD::AND &&
	N0.getOperand(1).getOpcode() == ISD::Constant) {
	SDValue N00 = N0.getOperand(0);
	APInt Mask = N0.getConstantOperandAPInt(1);
	Mask <<= N1C->getAPIntValue();
	bool MaskOK = false;
	// We can handle cases concerning bit-widening nodes containing setcc_c if
	// we carefully interrogate the mask to make sure we are semantics
	// preserving.
	// The transform is not safe if the result of C1 << C2 exceeds the bitwidth
	// of the underlying setcc_c operation if the setcc_c was zero extended.
	// Consider the following example:
	// zext(setcc_c) -> i32 0x0000FFFF
	// c1 -> i32 0x0000FFFF
	// c2 -> i32 0x00000001
	// (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE
	// (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE
	if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
	MaskOK = true;
	} else if (N00.getOpcode() == ISD::SIGN_EXTEND &&
	N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
	MaskOK = true;
	} else if ((N00.getOpcode() == ISD::ZERO_EXTEND \|\|
	N00.getOpcode() == ISD::ANY_EXTEND) &&
	N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
	MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits());
	}
	if (MaskOK && Mask != 0) {
	SDLoc DL(N);
	return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));
	}
	}

	return SDValue();
	}

	static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	unsigned Size = VT.getSizeInBits();

	if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget))
	return V;

	// fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
	// into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
	// into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
	// depending on sign of (SarConst - [56,48,32,24,16])

	// sexts in X86 are MOVs. The MOVs have the same code size
	// as above SHIFTs (only SHIFT on 1 has lower code size).
	// However the MOVs have 2 advantages to a SHIFT:
	// 1. MOVs can write to a register that differs from source
	// 2. MOVs accept memory operands

	if (VT.isVector() \|\| N1.getOpcode() != ISD::Constant \|\|
	N0.getOpcode() != ISD::SHL \|\| !N0.hasOneUse() \|\|
	N0.getOperand(1).getOpcode() != ISD::Constant)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
	APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
	EVT CVT = N1.getValueType();

	if (SarConst.isNegative())
	return SDValue();

	for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
	unsigned ShiftSize = SVT.getSizeInBits();
	// skipping types without corresponding sext/zext and
	// ShlConst that is not one of [56,48,32,24,16]
	if (ShiftSize >= Size \|\| ShlConst != Size - ShiftSize)
	continue;
	SDLoc DL(N);
	SDValue NN =
	DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
	SarConst = SarConst - (Size - ShiftSize);
	if (SarConst == 0)
	return NN;
	if (SarConst.isNegative())
	return DAG.getNode(ISD::SHL, DL, VT, NN,
	DAG.getConstant(-SarConst, DL, CVT));
	return DAG.getNode(ISD::SRA, DL, VT, NN,
	DAG.getConstant(SarConst, DL, CVT));
	}
	return SDValue();
	}

	static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();

	if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget))
	return V;

	// Only do this on the last DAG combine as it can interfere with other
	// combines.
	if (!DCI.isAfterLegalizeDAG())
	return SDValue();

	// Try to improve a sequence of srl (and X, C1), C2 by inverting the order.
	// TODO: This is a generic DAG combine that became an x86-only combine to
	// avoid shortcomings in other folds such as bswap, bit-test ('bt'), and
	// and-not ('andn').
	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
	return SDValue();

	auto *ShiftC = dyn_cast<ConstantSDNode>(N1);
	auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (!ShiftC \|\| !AndC)
	return SDValue();

	// If we can shrink the constant mask below 8-bits or 32-bits, then this
	// transform should reduce code size. It may also enable secondary transforms
	// from improved known-bits analysis or instruction selection.
	APInt MaskVal = AndC->getAPIntValue();

	// If this can be matched by a zero extend, don't optimize.
	if (MaskVal.isMask()) {
	unsigned TO = MaskVal.countTrailingOnes();
	if (TO >= 8 && isPowerOf2_32(TO))
	return SDValue();
	}

	APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());
	unsigned OldMaskSize = MaskVal.getMinSignedBits();
	unsigned NewMaskSize = NewMaskVal.getMinSignedBits();
	if ((OldMaskSize > 8 && NewMaskSize <= 8) \|\|
	(OldMaskSize > 32 && NewMaskSize <= 32)) {
	// srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)
	SDLoc DL(N);
	SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);
	SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
	return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask);
	}
	return SDValue();
	}

	static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned Opcode = N->getOpcode();
	assert(isHorizOp(Opcode) && "Unexpected hadd/hsub/pack opcode");

	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT SrcVT = N0.getValueType();

	SDValue BC0 =
	N->isOnlyUserOf(N0.getNode()) ? peekThroughOneUseBitcasts(N0) : N0;
	SDValue BC1 =
	N->isOnlyUserOf(N1.getNode()) ? peekThroughOneUseBitcasts(N1) : N1;

	// Attempt to fold HOP(LOSUBVECTOR(SHUFFLE(X)),HISUBVECTOR(SHUFFLE(X)))
	// to SHUFFLE(HOP(LOSUBVECTOR(X),HISUBVECTOR(X))), this is mainly for
	// truncation trees that help us avoid lane crossing shuffles.
	// TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
	// TODO: We don't handle vXf64 shuffles yet.
	if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {
	if (SDValue BCSrc = getSplitVectorSrc(BC0, BC1, false)) {
	SmallVector<SDValue> ShuffleOps;
	SmallVector<int> ShuffleMask, ScaledMask;
	SDValue Vec = peekThroughBitcasts(BCSrc);
	if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) {
	resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask);
	// To keep the HOP LHS/RHS coherency, we must be able to scale the unary
	// shuffle to a v4X64 width - we can probably relax this in the future.
	if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
	ShuffleOps[0].getValueType().is256BitVector() &&
	scaleShuffleElements(ShuffleMask, 4, ScaledMask)) {
	SDValue Lo, Hi;
	MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
	std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL);
	Lo = DAG.getBitcast(SrcVT, Lo);
	Hi = DAG.getBitcast(SrcVT, Hi);
	SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
	Res = DAG.getBitcast(ShufVT, Res);
	Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask);
	return DAG.getBitcast(VT, Res);
	}
	}
	}
	}

	// Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(Z,W)) -> SHUFFLE(HOP()).
	if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {
	// If either/both ops are a shuffle that can scale to v2x64,
	// then see if we can perform this as a v4x32 post shuffle.
	SmallVector<SDValue> Ops0, Ops1;
	SmallVector<int> Mask0, Mask1, ScaledMask0, ScaledMask1;
	bool IsShuf0 =
	getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) &&
	scaleShuffleElements(Mask0, 2, ScaledMask0) &&
	all_of(Ops0, [](SDValue Op) { return Op.getValueSizeInBits() == 128; });
	bool IsShuf1 =
	getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) &&
	scaleShuffleElements(Mask1, 2, ScaledMask1) &&
	all_of(Ops1, [](SDValue Op) { return Op.getValueSizeInBits() == 128; });
	if (IsShuf0 \|\| IsShuf1) {
	if (!IsShuf0) {
	Ops0.assign({BC0});
	ScaledMask0.assign({0, 1});
	}
	if (!IsShuf1) {
	Ops1.assign({BC1});
	ScaledMask1.assign({0, 1});
	}

	SDValue LHS, RHS;
	int PostShuffle[4] = {-1, -1, -1, -1};
	auto FindShuffleOpAndIdx = [&](int M, int &Idx, ArrayRef<SDValue> Ops) {
	if (M < 0)
	return true;
	Idx = M % 2;
	SDValue Src = Ops[M / 2];
	if (!LHS \|\| LHS == Src) {
	LHS = Src;
	return true;
	}
	if (!RHS \|\| RHS == Src) {
	Idx += 2;
	RHS = Src;
	return true;
	}
	return false;
	};
	if (FindShuffleOpAndIdx(ScaledMask0[0], PostShuffle[0], Ops0) &&
	FindShuffleOpAndIdx(ScaledMask0[1], PostShuffle[1], Ops0) &&
	FindShuffleOpAndIdx(ScaledMask1[0], PostShuffle[2], Ops1) &&
	FindShuffleOpAndIdx(ScaledMask1[1], PostShuffle[3], Ops1)) {
	LHS = DAG.getBitcast(SrcVT, LHS);
	RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS);
	MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
	SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);
	Res = DAG.getBitcast(ShufVT, Res);
	Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, PostShuffle);
	return DAG.getBitcast(VT, Res);
	}
	}
	}

	// Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y)).
	if (VT.is256BitVector() && Subtarget.hasInt256()) {
	SmallVector<int> Mask0, Mask1;
	SmallVector<SDValue> Ops0, Ops1;
	SmallVector<int, 2> ScaledMask0, ScaledMask1;
	if (getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) &&
	getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) &&
	!Ops0.empty() && !Ops1.empty() &&
	all_of(Ops0,
	[](SDValue Op) { return Op.getValueType().is256BitVector(); }) &&
	all_of(Ops1,
	[](SDValue Op) { return Op.getValueType().is256BitVector(); }) &&
	scaleShuffleElements(Mask0, 2, ScaledMask0) &&
	scaleShuffleElements(Mask1, 2, ScaledMask1)) {
	SDValue Op00 = peekThroughBitcasts(Ops0.front());
	SDValue Op10 = peekThroughBitcasts(Ops1.front());
	SDValue Op01 = peekThroughBitcasts(Ops0.back());
	SDValue Op11 = peekThroughBitcasts(Ops1.back());
	if ((Op00 == Op11) && (Op01 == Op10)) {
	std::swap(Op10, Op11);
	ShuffleVectorSDNode::commuteMask(ScaledMask1);
	}
	if ((Op00 == Op10) && (Op01 == Op11)) {
	const int Map[4] = {0, 2, 1, 3};
	SmallVector<int, 4> ShuffleMask(
	{Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]],
	Map[ScaledMask1[1]]});
	MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
	SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00),
	DAG.getBitcast(SrcVT, Op01));
	Res = DAG.getBitcast(ShufVT, Res);
	Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask);
	return DAG.getBitcast(VT, Res);
	}
	}
	}

	return SDValue();
	}

	static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	unsigned Opcode = N->getOpcode();
	assert((X86ISD::PACKSS == Opcode \|\| X86ISD::PACKUS == Opcode) &&
	"Unexpected pack opcode");

	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	unsigned NumDstElts = VT.getVectorNumElements();
	unsigned DstBitsPerElt = VT.getScalarSizeInBits();
	unsigned SrcBitsPerElt = 2 * DstBitsPerElt;
	assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt &&
	N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
	"Unexpected PACKSS/PACKUS input type");

	bool IsSigned = (X86ISD::PACKSS == Opcode);

	// Constant Folding.
	APInt UndefElts0, UndefElts1;
	SmallVector<APInt, 32> EltBits0, EltBits1;
	if ((N0.isUndef() \|\| N->isOnlyUserOf(N0.getNode())) &&
	(N1.isUndef() \|\| N->isOnlyUserOf(N1.getNode())) &&
	getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) &&
	getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) {
	unsigned NumLanes = VT.getSizeInBits() / 128;
	unsigned NumSrcElts = NumDstElts / 2;
	unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
	unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;

	APInt Undefs(NumDstElts, 0);
	SmallVector<APInt, 32> Bits(NumDstElts, APInt::getZero(DstBitsPerElt));
	for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
	for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
	unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
	auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0);
	auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0);

	if (UndefElts[SrcIdx]) {
	Undefs.setBit(Lane * NumDstEltsPerLane + Elt);
	continue;
	}

	APInt &Val = EltBits[SrcIdx];
	if (IsSigned) {
	// PACKSS: Truncate signed value with signed saturation.
	// Source values less than dst minint are saturated to minint.
	// Source values greater than dst maxint are saturated to maxint.
	if (Val.isSignedIntN(DstBitsPerElt))
	Val = Val.trunc(DstBitsPerElt);
	else if (Val.isNegative())
	Val = APInt::getSignedMinValue(DstBitsPerElt);
	else
	Val = APInt::getSignedMaxValue(DstBitsPerElt);
	} else {
	// PACKUS: Truncate signed value with unsigned saturation.
	// Source values less than zero are saturated to zero.
	// Source values greater than dst maxuint are saturated to maxuint.
	if (Val.isIntN(DstBitsPerElt))
	Val = Val.trunc(DstBitsPerElt);
	else if (Val.isNegative())
	Val = APInt::getZero(DstBitsPerElt);
	else
	Val = APInt::getAllOnes(DstBitsPerElt);
	}
	Bits[Lane * NumDstEltsPerLane + Elt] = Val;
	}
	}

	return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
	}

	// Try to fold PACK(SHUFFLE(),SHUFFLE()) -> SHUFFLE(PACK()).
	if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
	return V;

	// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
	// truncate to create a larger truncate.
	if (Subtarget.hasAVX512() &&
	N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
	N0.getOperand(0).getValueType() == MVT::v8i32) {
	if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) \|\|
	(!IsSigned &&
	DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
	if (Subtarget.hasVLX())
	return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));

	// Widen input to v16i32 so we can truncate that.
	SDLoc dl(N);
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32,
	N0.getOperand(0), DAG.getUNDEF(MVT::v8i32));
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat);
	}
	}

	// Try to fold PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors.
	if (VT.is128BitVector()) {
	unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue Src0, Src1;
	if (N0.getOpcode() == ExtOpc &&
	N0.getOperand(0).getValueType().is64BitVector() &&
	N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
	Src0 = N0.getOperand(0);
	}
	if (N1.getOpcode() == ExtOpc &&
	N1.getOperand(0).getValueType().is64BitVector() &&
	N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
	Src1 = N1.getOperand(0);
	}
	if ((Src0 \|\| N0.isUndef()) && (Src1 \|\| N1.isUndef())) {
	assert((Src0 \|\| Src1) && "Found PACK(UNDEF,UNDEF)");
	Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType());
	Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType());
	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);
	}

	// Try again with pack(*_extend_vector_inreg, undef).
	unsigned VecInRegOpc = IsSigned ? ISD::SIGN_EXTEND_VECTOR_INREG
	: ISD::ZERO_EXTEND_VECTOR_INREG;
	if (N0.getOpcode() == VecInRegOpc && N1.isUndef() &&
	N0.getOperand(0).getScalarValueSizeInBits() < DstBitsPerElt)
	return getEXTEND_VECTOR_INREG(ExtOpc, SDLoc(N), VT, N0.getOperand(0),
	DAG);
	}

	// Attempt to combine as shuffle.
	SDValue Op(N, 0);
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;

	return SDValue();
	}

	static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert((X86ISD::HADD == N->getOpcode() \|\| X86ISD::FHADD == N->getOpcode() \|\|
	X86ISD::HSUB == N->getOpcode() \|\| X86ISD::FHSUB == N->getOpcode()) &&
	"Unexpected horizontal add/sub opcode");

	if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
	MVT VT = N->getSimpleValueType(0);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
	if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
	LHS.getOpcode() == RHS.getOpcode() &&
	LHS.getValueType() == RHS.getValueType() &&
	N->isOnlyUserOf(LHS.getNode()) && N->isOnlyUserOf(RHS.getNode())) {
	SDValue LHS0 = LHS.getOperand(0);
	SDValue LHS1 = LHS.getOperand(1);
	SDValue RHS0 = RHS.getOperand(0);
	SDValue RHS1 = RHS.getOperand(1);
	if ((LHS0 == LHS1 \|\| LHS0.isUndef() \|\| LHS1.isUndef()) &&
	(RHS0 == RHS1 \|\| RHS0.isUndef() \|\| RHS1.isUndef())) {
	SDLoc DL(N);
	SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(),
	LHS0.isUndef() ? LHS1 : LHS0,
	RHS0.isUndef() ? RHS1 : RHS0);
	MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
	Res = DAG.getBitcast(ShufVT, Res);
	SDValue NewLHS =
	DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res,
	getV4X86ShuffleImm8ForMask({0, 1, 0, 1}, DL, DAG));
	SDValue NewRHS =
	DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res,
	getV4X86ShuffleImm8ForMask({2, 3, 2, 3}, DL, DAG));
	return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(VT, NewLHS),
	DAG.getBitcast(VT, NewRHS));
	}
	}
	}

	// Try to fold HOP(SHUFFLE(),SHUFFLE()) -> SHUFFLE(HOP()).
	if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
	return V;

	return SDValue();
	}

	static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert((X86ISD::VSHL == N->getOpcode() \|\| X86ISD::VSRA == N->getOpcode() \|\|
	X86ISD::VSRL == N->getOpcode()) &&
	"Unexpected shift opcode");
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Shift zero -> zero.
	if (ISD::isBuildVectorAllZeros(N0.getNode()))
	return DAG.getConstant(0, SDLoc(N), VT);

	// Detect constant shift amounts.
	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) {
	unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false);
	return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0,
	EltBits[0].getZExtValue(), DAG);
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
	if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	unsigned Opcode = N->getOpcode();
	assert((X86ISD::VSHLI == Opcode \|\| X86ISD::VSRAI == Opcode \|\|
	X86ISD::VSRLI == Opcode) &&
	"Unexpected shift opcode");
	bool LogicalShift = X86ISD::VSHLI == Opcode \|\| X86ISD::VSRLI == Opcode;
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
	assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
	"Unexpected value type");
	assert(N->getOperand(1).getValueType() == MVT::i8 &&
	"Unexpected shift amount type");

	// (shift undef, X) -> 0
	if (N0.isUndef())
	return DAG.getConstant(0, SDLoc(N), VT);

	// Out of range logical bit shifts are guaranteed to be zero.
	// Out of range arithmetic bit shifts splat the sign bit.
	unsigned ShiftVal = N->getConstantOperandVal(1);
	if (ShiftVal >= NumBitsPerElt) {
	if (LogicalShift)
	return DAG.getConstant(0, SDLoc(N), VT);
	ShiftVal = NumBitsPerElt - 1;
	}

	// (shift X, 0) -> X
	if (!ShiftVal)
	return N0;

	// (shift 0, C) -> 0
	if (ISD::isBuildVectorAllZeros(N0.getNode()))
	// N0 is all zeros or undef. We guarantee that the bits shifted into the
	// result are all zeros, not undef.
	return DAG.getConstant(0, SDLoc(N), VT);

	// (VSRAI -1, C) -> -1
	if (!LogicalShift && ISD::isBuildVectorAllOnes(N0.getNode()))
	// N0 is all ones or undef. We guarantee that the bits shifted into the
	// result are all ones, not undef.
	return DAG.getConstant(-1, SDLoc(N), VT);

	auto MergeShifts = [&](SDValue X, uint64_t Amt0, uint64_t Amt1) {
	unsigned NewShiftVal = Amt0 + Amt1;
	if (NewShiftVal >= NumBitsPerElt) {
	// Out of range logical bit shifts are guaranteed to be zero.
	// Out of range arithmetic bit shifts splat the sign bit.
	if (LogicalShift)
	return DAG.getConstant(0, SDLoc(N), VT);
	NewShiftVal = NumBitsPerElt - 1;
	}
	return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0),
	DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
	};

	// (shift (shift X, C2), C1) -> (shift X, (C1 + C2))
	if (Opcode == N0.getOpcode())
	return MergeShifts(N0.getOperand(0), ShiftVal, N0.getConstantOperandVal(1));

	// (shl (add X, X), C) -> (shl X, (C + 1))
	if (Opcode == X86ISD::VSHLI && N0.getOpcode() == ISD::ADD &&
	N0.getOperand(0) == N0.getOperand(1))
	return MergeShifts(N0.getOperand(0), ShiftVal, 1);

	// We can decode 'whole byte' logical bit shifts as shuffles.
	if (LogicalShift && (ShiftVal % 8) == 0) {
	SDValue Op(N, 0);
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;
	}

	// Constant Folding.
	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	if (N->isOnlyUserOf(N0.getNode()) &&
	getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
	assert(EltBits.size() == VT.getVectorNumElements() &&
	"Unexpected shift value type");
	// Undef elements need to fold to 0. It's possible SimplifyDemandedBits
	// created an undef input due to no input bits being demanded, but user
	// still expects 0 in other bits.
	for (unsigned i = 0, e = EltBits.size(); i != e; ++i) {
	APInt &Elt = EltBits[i];
	if (UndefElts[i])
	Elt = 0;
	else if (X86ISD::VSHLI == Opcode)
	Elt <<= ShiftVal;
	else if (X86ISD::VSRAI == Opcode)
	Elt.ashrInPlace(ShiftVal);
	else
	Elt.lshrInPlace(ShiftVal);
	}
	// Reset undef elements since they were zeroed above.
	UndefElts = 0;
	return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBitsPerElt),
	DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	unsigned Opcode = N->getOpcode();
	assert(((Opcode == X86ISD::PINSRB && VT == MVT::v16i8) \|\|
	(Opcode == X86ISD::PINSRW && VT == MVT::v8i16) \|\|
	Opcode == ISD::INSERT_VECTOR_ELT) &&
	"Unexpected vector insertion");

	// Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt).
	if (Opcode == ISD::INSERT_VECTOR_ELT && N->getOperand(0).isUndef() &&
	isNullConstant(N->getOperand(2)))
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, N->getOperand(1));

	if (Opcode == X86ISD::PINSRB \|\| Opcode == X86ISD::PINSRW) {
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedBits(SDValue(N, 0),
	APInt::getAllOnes(NumBitsPerElt), DCI))
	return SDValue(N, 0);
	}

	// Attempt to combine insertion patterns to a shuffle.
	if (VT.isSimple() && DCI.isAfterLegalizeDAG()) {
	SDValue Op(N, 0);
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;
	}

	return SDValue();
	}

	/// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
	/// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
	/// OR -> CMPNEQSS.
	static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	unsigned opcode;

	// SSE1 supports CMP{eq\|ne}SS, and SSE2 added CMP{eq\|ne}SD, but
	// we're requiring SSE2 for both.
	if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CMP0 = N0.getOperand(1);
	SDValue CMP1 = N1.getOperand(1);
	SDLoc DL(N);

	// The SETCCs should both refer to the same CMP.
	if (CMP0.getOpcode() != X86ISD::FCMP \|\| CMP0 != CMP1)
	return SDValue();

	SDValue CMP00 = CMP0->getOperand(0);
	SDValue CMP01 = CMP0->getOperand(1);
	EVT VT = CMP00.getValueType();

	if (VT == MVT::f32 \|\| VT == MVT::f64 \|\|
	(VT == MVT::f16 && Subtarget.hasFP16())) {
	bool ExpectingFlags = false;
	// Check for any users that want flags:
	for (const SDNode *U : N->uses()) {
	if (ExpectingFlags)
	break;

	switch (U->getOpcode()) {
	default:
	case ISD::BR_CC:
	case ISD::BRCOND:
	case ISD::SELECT:
	ExpectingFlags = true;
	break;
	case ISD::CopyToReg:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	break;
	}
	}

	if (!ExpectingFlags) {
	enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
	enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);

	if (cc1 == X86::COND_E \|\| cc1 == X86::COND_NE) {
	X86::CondCode tmp = cc0;
	cc0 = cc1;
	cc1 = tmp;
	}

	if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) \|\|
	(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
	// FIXME: need symbolic constants for these magic numbers.
	// See X86ATTInstPrinter.cpp:printSSECC().
	unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
	if (Subtarget.hasAVX512()) {
	SDValue FSetCC =
	DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
	DAG.getTargetConstant(x86cc, DL, MVT::i8));
	// Need to fill with zeros to ensure the bitcast will produce zeroes
	// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
	SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
	DAG.getConstant(0, DL, MVT::v16i1),
	FSetCC, DAG.getIntPtrConstant(0, DL));
	return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL,
	N->getSimpleValueType(0));
	}
	SDValue OnesOrZeroesF =
	DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00,
	CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8));

	bool is64BitFP = (CMP00.getValueType() == MVT::f64);
	MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;

	if (is64BitFP && !Subtarget.is64Bit()) {
	// On a 32-bit target, we cannot bitcast the 64-bit float to a
	// 64-bit integer, since that's not a legal type. Since
	// OnesOrZeroesF is all ones or all zeroes, we don't need all the
	// bits, but can do this little dance to extract the lowest 32 bits
	// and work with those going forward.
	SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
	OnesOrZeroesF);
	SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
	OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
	Vector32, DAG.getIntPtrConstant(0, DL));
	IntVT = MVT::i32;
	}

	SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
	SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
	DAG.getConstant(1, DL, IntVT));
	SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
	ANDed);
	return OneBitOfTruth;
	}
	}
	}
	}
	return SDValue();
	}

	/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
	static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP");

	MVT VT = N->getSimpleValueType(0);
	if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
	return SDValue();

	SDValue X, Y;
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	if (SDValue Not = IsNOT(N0, DAG)) {
	X = Not;
	Y = N1;
	} else if (SDValue Not = IsNOT(N1, DAG)) {
	X = Not;
	Y = N0;
	} else
	return SDValue();

	X = DAG.getBitcast(VT, X);
	Y = DAG.getBitcast(VT, Y);
	return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
	}

	/// Try to fold:
	/// and (vector_shuffle<Z,...,Z>
	/// (insert_vector_elt undef, (xor X, -1), Z), undef), Y
	/// ->
	/// andnp (vector_shuffle<Z,...,Z>
	/// (insert_vector_elt undef, X, Z), undef), Y
	static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP");

	EVT VT = N->getValueType(0);
	// Do not split 256 and 512 bit vectors with SSE2 as they overwrite original
	// value and require extra moves.
	if (!((VT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	((VT.is256BitVector() \|\| VT.is512BitVector()) && Subtarget.hasAVX())))
	return SDValue();

	auto GetNot = [&DAG](SDValue V) {
	auto *SVN = dyn_cast<ShuffleVectorSDNode>(peekThroughOneUseBitcasts(V));
	// TODO: SVN->hasOneUse() is a strong condition. It can be relaxed if all
	// end-users are ISD::AND including cases
	// (and(extract_vector_element(SVN), Y)).
	if (!SVN \|\| !SVN->hasOneUse() \|\| !SVN->isSplat() \|\|
	!SVN->getOperand(1).isUndef()) {
	return SDValue();
	}
	SDValue IVEN = SVN->getOperand(0);
	if (IVEN.getOpcode() != ISD::INSERT_VECTOR_ELT \|\|
	!IVEN.getOperand(0).isUndef() \|\| !IVEN.hasOneUse())
	return SDValue();
	if (!isa<ConstantSDNode>(IVEN.getOperand(2)) \|\|
	IVEN.getConstantOperandAPInt(2) != SVN->getSplatIndex())
	return SDValue();
	SDValue Src = IVEN.getOperand(1);
	if (SDValue Not = IsNOT(Src, DAG)) {
	SDValue NotSrc = DAG.getBitcast(Src.getValueType(), Not);
	SDValue NotIVEN =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(IVEN), IVEN.getValueType(),
	IVEN.getOperand(0), NotSrc, IVEN.getOperand(2));
	return DAG.getVectorShuffle(SVN->getValueType(0), SDLoc(SVN), NotIVEN,
	SVN->getOperand(1), SVN->getMask());
	}
	return SDValue();
	};

	SDValue X, Y;
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	if (SDValue Not = GetNot(N0)) {
	X = Not;
	Y = N1;
	} else if (SDValue Not = GetNot(N1)) {
	X = Not;
	Y = N0;
	} else
	return SDValue();

	X = DAG.getBitcast(VT, X);
	Y = DAG.getBitcast(VT, Y);
	SDLoc DL(N);
	// We do not split for SSE at all, but we need to split vectors for AVX1 and
	// AVX2.
	if (!Subtarget.useAVX512Regs() && VT.is512BitVector()) {
	SDValue LoX, HiX;
	std::tie(LoX, HiX) = splitVector(X, DAG, DL);
	SDValue LoY, HiY;
	std::tie(LoY, HiY) = splitVector(Y, DAG, DL);
	EVT SplitVT = LoX.getValueType();
	SDValue LoV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {LoX, LoY});
	SDValue HiV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {HiX, HiY});
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoV, HiV});
	}
	return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y});
	}

	// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
	// logical operations, like in the example below.
	// or (and (truncate x, truncate y)),
	// (xor (truncate z, build_vector (constants)))
	// Given a target type \p VT, we generate
	// or (and x, y), (xor z, zext(build_vector (constants)))
	// given x, y and z are of type \p VT. We can do so, if operands are either
	// truncates from VT types, the second operand is a vector of constants or can
	// be recursively promoted.
	static SDValue PromoteMaskArithmetic(SDNode *N, EVT VT, SelectionDAG &DAG,
	unsigned Depth) {
	// Limit recursion to avoid excessive compile times.
	if (Depth >= SelectionDAG::MaxRecursionDepth)
	return SDValue();

	if (N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND &&
	N->getOpcode() != ISD::OR)
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDLoc DL(N);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isOperationLegalOrPromote(N->getOpcode(), VT))
	return SDValue();

	if (SDValue NN0 = PromoteMaskArithmetic(N0.getNode(), VT, DAG, Depth + 1))
	N0 = NN0;
	else {
	// The Left side has to be a trunc.
	if (N0.getOpcode() != ISD::TRUNCATE)
	return SDValue();

	// The type of the truncated inputs.
	if (N0.getOperand(0).getValueType() != VT)
	return SDValue();

	N0 = N0.getOperand(0);
	}

	if (SDValue NN1 = PromoteMaskArithmetic(N1.getNode(), VT, DAG, Depth + 1))
	N1 = NN1;
	else {
	// The right side has to be a 'trunc' or a constant vector.
	bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
	N1.getOperand(0).getValueType() == VT;
	if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
	return SDValue();

	if (RHSTrunc)
	N1 = N1.getOperand(0);
	else
	N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
	}

	return DAG.getNode(N->getOpcode(), DL, VT, N0, N1);
	}

	// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
	// register. In most cases we actually compare or select YMM-sized registers
	// and mixing the two types creates horrible code. This method optimizes
	// some of the transition sequences.
	// Even with AVX-512 this is still useful for removing casts around logical
	// operations on vXi1 mask types.
	static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	assert(VT.isVector() && "Expected vector type");

	SDLoc DL(N);
	assert((N->getOpcode() == ISD::ANY_EXTEND \|\|
	N->getOpcode() == ISD::ZERO_EXTEND \|\|
	N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");

	SDValue Narrow = N->getOperand(0);
	EVT NarrowVT = Narrow.getValueType();

	// Generate the wide operation.
	SDValue Op = PromoteMaskArithmetic(Narrow.getNode(), VT, DAG, 0);
	if (!Op)
	return SDValue();
	switch (N->getOpcode()) {
	default: llvm_unreachable("Unexpected opcode");
	case ISD::ANY_EXTEND:
	return Op;
	case ISD::ZERO_EXTEND:
	return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
	case ISD::SIGN_EXTEND:
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
	Op, DAG.getValueType(NarrowVT));
	}
	}

	static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) {
	unsigned FPOpcode;
	switch (Opcode) {
	default: llvm_unreachable("Unexpected input node for FP logic conversion");
	case ISD::AND: FPOpcode = X86ISD::FAND; break;
	case ISD::OR: FPOpcode = X86ISD::FOR; break;
	case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
	}
	return FPOpcode;
	}

	/// If both input operands of a logic op are being cast from floating-point
	/// types or FP compares, try to convert this into a floating-point logic node
	/// to avoid unnecessary moves from SSE to integer registers.
	static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDLoc DL(N);

	if (!((N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) \|\|
	(N0.getOpcode() == ISD::SETCC && N1.getOpcode() == ISD::SETCC)))
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N10 = N1.getOperand(0);
	EVT N00Type = N00.getValueType();
	EVT N10Type = N10.getValueType();

	// Ensure that both types are the same and are legal scalar fp types.
	if (N00Type != N10Type \|\| !((Subtarget.hasSSE1() && N00Type == MVT::f32) \|\|
	(Subtarget.hasSSE2() && N00Type == MVT::f64) \|\|
	(Subtarget.hasFP16() && N00Type == MVT::f16)))
	return SDValue();

	if (N0.getOpcode() == ISD::BITCAST && !DCI.isBeforeLegalizeOps()) {
	unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
	SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
	return DAG.getBitcast(VT, FPLogic);
	}

	if (VT != MVT::i1 \|\| N0.getOpcode() != ISD::SETCC \|\| !N0.hasOneUse() \|\|
	!N1.hasOneUse())
	return SDValue();

	ISD::CondCode CC0 = cast<CondCodeSDNode>(N0.getOperand(2))->get();
	ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();

	// The vector ISA for FP predicates is incomplete before AVX, so converting
	// COMIS* to CMPS* may not be a win before AVX.
	if (!Subtarget.hasAVX() &&
	!(cheapX86FSETCC_SSE(CC0) && cheapX86FSETCC_SSE(CC1)))
	return SDValue();

	// Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*)
	// and vector logic:
	// logic (setcc N00, N01), (setcc N10, N11) -->
	// extelt (logic (setcc (s2v N00), (s2v N01)), setcc (s2v N10), (s2v N11))), 0
	unsigned NumElts = 128 / N00Type.getSizeInBits();
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), N00Type, NumElts);
	EVT BoolVecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
	SDValue ZeroIndex = DAG.getVectorIdxConstant(0, DL);
	SDValue N01 = N0.getOperand(1);
	SDValue N11 = N1.getOperand(1);
	SDValue Vec00 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N00);
	SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01);
	SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10);
	SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11);
	SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01, CC0);
	SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11, CC1);
	SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex);
	}

	// Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
	// to reduce XMM->GPR traffic.
	static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) {
	unsigned Opc = N->getOpcode();
	assert((Opc == ISD::OR \|\| Opc == ISD::AND \|\| Opc == ISD::XOR) &&
	"Unexpected bit opcode");

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Both operands must be single use MOVMSK.
	if (N0.getOpcode() != X86ISD::MOVMSK \|\| !N0.hasOneUse() \|\|
	N1.getOpcode() != X86ISD::MOVMSK \|\| !N1.hasOneUse())
	return SDValue();

	SDValue Vec0 = N0.getOperand(0);
	SDValue Vec1 = N1.getOperand(0);
	EVT VecVT0 = Vec0.getValueType();
	EVT VecVT1 = Vec1.getValueType();

	// Both MOVMSK operands must be from vectors of the same size and same element
	// size, but its OK for a fp/int diff.
	if (VecVT0.getSizeInBits() != VecVT1.getSizeInBits() \|\|
	VecVT0.getScalarSizeInBits() != VecVT1.getScalarSizeInBits())
	return SDValue();

	SDLoc DL(N);
	unsigned VecOpc =
	VecVT0.isFloatingPoint() ? convertIntLogicToFPLogicOpcode(Opc) : Opc;
	SDValue Result =
	DAG.getNode(VecOpc, DL, VecVT0, Vec0, DAG.getBitcast(VecVT0, Vec1));
	return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
	}

	// Attempt to fold BITOP(SHIFT(X,Z),SHIFT(Y,Z)) -> SHIFT(BITOP(X,Y),Z).
	// NOTE: This is a very limited case of what SimplifyUsingDistributiveLaws
	// handles in InstCombine.
	static SDValue combineBitOpWithShift(SDNode *N, SelectionDAG &DAG) {
	unsigned Opc = N->getOpcode();
	assert((Opc == ISD::OR \|\| Opc == ISD::AND \|\| Opc == ISD::XOR) &&
	"Unexpected bit opcode");

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);

	// Both operands must be single use.
	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
	return SDValue();

	// Search for matching shifts.
	SDValue BC0 = peekThroughOneUseBitcasts(N0);
	SDValue BC1 = peekThroughOneUseBitcasts(N1);

	unsigned BCOpc = BC0.getOpcode();
	EVT BCVT = BC0.getValueType();
	if (BCOpc != BC1->getOpcode() \|\| BCVT != BC1.getValueType())
	return SDValue();

	switch (BCOpc) {
	case X86ISD::VSHLI:
	case X86ISD::VSRLI:
	case X86ISD::VSRAI: {
	if (BC0.getOperand(1) != BC1.getOperand(1))
	return SDValue();

	SDLoc DL(N);
	SDValue BitOp =
	DAG.getNode(Opc, DL, BCVT, BC0.getOperand(0), BC1.getOperand(0));
	SDValue Shift = DAG.getNode(BCOpc, DL, BCVT, BitOp, BC0.getOperand(1));
	return DAG.getBitcast(VT, Shift);
	}
	}

	return SDValue();
	}

	/// If this is a zero/all-bits result that is bitwise-anded with a low bits
	/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
	/// with a shift-right to eliminate loading the vector constant mask value.
	static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
	SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
	EVT VT = Op0.getValueType();
	if (VT != Op1.getValueType() \|\| !VT.isSimple() \|\| !VT.isInteger())
	return SDValue();

	// Try to convert an "is positive" signbit masking operation into arithmetic
	// shift and "andn". This saves a materialization of a -1 vector constant.
	// The "is negative" variant should be handled more generally because it only
	// requires "and" rather than "andn":
	// and (pcmpgt X, -1), Y --> pandn (vsrai X, BitWidth - 1), Y
	//
	// This is limited to the original type to avoid producing even more bitcasts.
	// If the bitcasts can't be eliminated, then it is unlikely that this fold
	// will be profitable.
	if (N->getValueType(0) == VT &&
	supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRA)) {
	SDValue X, Y;
	if (Op1.hasOneUse() && Op1.getOpcode() == X86ISD::PCMPGT &&
	isAllOnesOrAllOnesSplat(Op1.getOperand(1))) {
	X = Op1.getOperand(0);
	Y = Op0;
	} else if (Op0.hasOneUse() && Op0.getOpcode() == X86ISD::PCMPGT &&
	isAllOnesOrAllOnesSplat(Op0.getOperand(1))) {
	X = Op0.getOperand(0);
	Y = Op1;
	}
	if (X && Y) {
	SDLoc DL(N);
	SDValue Sra =
	getTargetVShiftByConstNode(X86ISD::VSRAI, DL, VT.getSimpleVT(), X,
	VT.getScalarSizeInBits() - 1, DAG);
	return DAG.getNode(X86ISD::ANDNP, DL, VT, Sra, Y);
	}
	}

	APInt SplatVal;
	if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) \|\|
	!SplatVal.isMask())
	return SDValue();

	// Don't prevent creation of ANDN.
	if (isBitwiseNot(Op0))
	return SDValue();

	if (!supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRL))
	return SDValue();

	unsigned EltBitWidth = VT.getScalarSizeInBits();
	if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
	return SDValue();

	SDLoc DL(N);
	unsigned ShiftVal = SplatVal.countTrailingOnes();
	SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
	SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt);
	return DAG.getBitcast(N->getValueType(0), Shift);
	}

	// Get the index node from the lowered DAG of a GEP IR instruction with one
	// indexing dimension.
	static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) {
	if (Ld->isIndexed())
	return SDValue();

	SDValue Base = Ld->getBasePtr();

	if (Base.getOpcode() != ISD::ADD)
	return SDValue();

	SDValue ShiftedIndex = Base.getOperand(0);

	if (ShiftedIndex.getOpcode() != ISD::SHL)
	return SDValue();

	return ShiftedIndex.getOperand(0);

	}

	static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
	if (Subtarget.hasBMI2() && VT.isScalarInteger()) {
	switch (VT.getSizeInBits()) {
	default: return false;
	case 64: return Subtarget.is64Bit() ? true : false;
	case 32: return true;
	}
	}
	return false;
	}

	// This function recognizes cases where X86 bzhi instruction can replace and
	// 'and-load' sequence.
	// In case of loading integer value from an array of constants which is defined
	// as follows:
	//
	// int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1}
	//
	// then applying a bitwise and on the result with another input.
	// It's equivalent to performing bzhi (zero high bits) on the input, with the
	// same index of the load.
	static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Node->getSimpleValueType(0);
	SDLoc dl(Node);

	// Check if subtarget has BZHI instruction for the node's type
	if (!hasBZHI(Subtarget, VT))
	return SDValue();

	// Try matching the pattern for both operands.
	for (unsigned i = 0; i < 2; i++) {
	SDValue N = Node->getOperand(i);
	LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode());

	// continue if the operand is not a load instruction
	if (!Ld)
	return SDValue();

	const Value *MemOp = Ld->getMemOperand()->getValue();

	if (!MemOp)
	return SDValue();

	if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) {
	if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) {
	if (GV->isConstant() && GV->hasDefinitiveInitializer()) {

	Constant *Init = GV->getInitializer();
	Type *Ty = Init->getType();
	if (!isa<ConstantDataArray>(Init) \|\|
	!Ty->getArrayElementType()->isIntegerTy() \|\|
	Ty->getArrayElementType()->getScalarSizeInBits() !=
	VT.getSizeInBits() \|\|
	Ty->getArrayNumElements() >
	Ty->getArrayElementType()->getScalarSizeInBits())
	continue;

	// Check if the array's constant elements are suitable to our case.
	uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
	bool ConstantsMatch = true;
	for (uint64_t j = 0; j < ArrayElementCount; j++) {
	auto *Elem = cast<ConstantInt>(Init->getAggregateElement(j));
	if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) {
	ConstantsMatch = false;
	break;
	}
	}
	if (!ConstantsMatch)
	continue;

	// Do the transformation (For 32-bit type):
	// -> (and (load arr[idx]), inp)
	// <- (and (srl 0xFFFFFFFF, (sub 32, idx)))
	// that will be replaced with one bzhi instruction.
	SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0);
	SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32);

	// Get the Node which indexes into the array.
	SDValue Index = getIndexFromUnindexedLoad(Ld);
	if (!Index)
	return SDValue();
	Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32);

	SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index);
	Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub);

	SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
	SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub);

	return DAG.getNode(ISD::AND, dl, VT, Inp, LShr);
	}
	}
	}
	}
	return SDValue();
	}

	// Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C)
	// Where C is a mask containing the same number of bits as the setcc and
	// where the setcc will freely 0 upper bits of k-register. We can replace the
	// undef in the concat with 0s and remove the AND. This mainly helps with
	// v2i1/v4i1 setcc being casted to scalar.
	static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");

	EVT VT = N->getValueType(0);

	// Make sure this is an AND with constant. We will check the value of the
	// constant later.
	auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C1)
	return SDValue();

	// This is implied by the ConstantSDNode.
	assert(!VT.isVector() && "Expected scalar VT!");

	SDValue Src = N->getOperand(0);
	if (!Src.hasOneUse())
	return SDValue();

	// (Optionally) peek through any_extend().
	if (Src.getOpcode() == ISD::ANY_EXTEND) {
	if (!Src.getOperand(0).hasOneUse())
	return SDValue();
	Src = Src.getOperand(0);
	}

	if (Src.getOpcode() != ISD::BITCAST \|\| !Src.getOperand(0).hasOneUse())
	return SDValue();

	Src = Src.getOperand(0);
	EVT SrcVT = Src.getValueType();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!SrcVT.isVector() \|\| SrcVT.getVectorElementType() != MVT::i1 \|\|
	!TLI.isTypeLegal(SrcVT))
	return SDValue();

	if (Src.getOpcode() != ISD::CONCAT_VECTORS)
	return SDValue();

	// We only care about the first subvector of the concat, we expect the
	// other subvectors to be ignored due to the AND if we make the change.
	SDValue SubVec = Src.getOperand(0);
	EVT SubVecVT = SubVec.getValueType();

	// The RHS of the AND should be a mask with as many bits as SubVec.
	if (!TLI.isTypeLegal(SubVecVT) \|\|
	!C1->getAPIntValue().isMask(SubVecVT.getVectorNumElements()))
	return SDValue();

	// First subvector should be a setcc with a legal result type or a
	// AND containing at least one setcc with a legal result type.
	auto IsLegalSetCC = [&](SDValue V) {
	if (V.getOpcode() != ISD::SETCC)
	return false;
	EVT SetccVT = V.getOperand(0).getValueType();
	if (!TLI.isTypeLegal(SetccVT) \|\|
	!(Subtarget.hasVLX() \|\| SetccVT.is512BitVector()))
	return false;
	if (!(Subtarget.hasBWI() \|\| SetccVT.getScalarSizeInBits() >= 32))
	return false;
	return true;
	};
	if (!(IsLegalSetCC(SubVec) \|\| (SubVec.getOpcode() == ISD::AND &&
	(IsLegalSetCC(SubVec.getOperand(0)) \|\|
	IsLegalSetCC(SubVec.getOperand(1))))))
	return SDValue();

	// We passed all the checks. Rebuild the concat_vectors with zeroes
	// and cast it back to VT.
	SDLoc dl(N);
	SmallVector<SDValue, 4> Ops(Src.getNumOperands(),
	DAG.getConstant(0, dl, SubVecVT));
	Ops[0] = SubVec;
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
	Ops);
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getSizeInBits());
	return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT);
	}

	static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// If this is SSE1 only convert to FAND to avoid scalarization.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
	return DAG.getBitcast(MVT::v4i32,
	DAG.getNode(X86ISD::FAND, dl, MVT::v4f32,
	DAG.getBitcast(MVT::v4f32, N0),
	DAG.getBitcast(MVT::v4f32, N1)));
	}

	// Use a 32-bit and+zext if upper bits known zero.
	if (VT == MVT::i64 && Subtarget.is64Bit() && !isa<ConstantSDNode>(N1)) {
	APInt HiMask = APInt::getHighBitsSet(64, 32);
	if (DAG.MaskedValueIsZero(N1, HiMask) \|\|
	DAG.MaskedValueIsZero(N0, HiMask)) {
	SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N0);
	SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N1);
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64,
	DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS));
	}
	}

	// Match all-of bool scalar reductions into a bitcast/movmsk + cmp.
	// TODO: Support multiple SrcOps.
	if (VT == MVT::i1) {
	SmallVector<SDValue, 2> SrcOps;
	SmallVector<APInt, 2> SrcPartials;
	if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) &&
	SrcOps.size() == 1) {
	unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
	EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
	SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
	if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType()))
	Mask = DAG.getBitcast(MaskVT, SrcOps[0]);
	if (Mask) {
	assert(SrcPartials[0].getBitWidth() == NumElts &&
	"Unexpected partial reduction mask");
	SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT);
	Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits);
	return DAG.getSetCC(dl, MVT::i1, Mask, PartialBits, ISD::SETEQ);
	}
	}
	}

	if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
	return V;

	if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
	return R;

	if (SDValue R = combineBitOpWithShift(N, DAG))
	return R;

	if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
	return FPLogic;

	if (SDValue R = combineAndShuffleNot(N, DAG, Subtarget))
	return R;

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
	return R;

	if (SDValue R = combineAndNotIntoANDNP(N, DAG))
	return R;

	if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
	return ShiftRight;

	if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
	return R;

	// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
	// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
	// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
	if (VT.isVector() && getTargetConstantFromNode(N1)) {
	unsigned Opc0 = N0.getOpcode();
	if ((Opc0 == ISD::MUL \|\| Opc0 == ISD::MULHU \|\| Opc0 == ISD::MULHS) &&
	getTargetConstantFromNode(N0.getOperand(1)) &&
	DAG.ComputeNumSignBits(N1) == VT.getScalarSizeInBits() &&
	N0->hasOneUse() && N0.getOperand(1)->hasOneUse()) {
	SDValue MaskMul = DAG.getNode(ISD::AND, dl, VT, N0.getOperand(1), N1);
	return DAG.getNode(Opc0, dl, VT, N0.getOperand(0), MaskMul);
	}
	}

	// Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant
	// avoids slow variable shift (moving shift amount to ECX etc.)
	if (isOneConstant(N1) && N0->hasOneUse()) {
	SDValue Src = N0;
	while ((Src.getOpcode() == ISD::ZERO_EXTEND \|\|
	Src.getOpcode() == ISD::TRUNCATE) &&
	Src.getOperand(0)->hasOneUse())
	Src = Src.getOperand(0);
	bool ContainsNOT = false;
	X86::CondCode X86CC = X86::COND_B;
	// Peek through AND(NOT(SRL(X,Y)),1).
	if (isBitwiseNot(Src)) {
	Src = Src.getOperand(0);
	X86CC = X86::COND_AE;
	ContainsNOT = true;
	}
	if (Src.getOpcode() == ISD::SRL &&
	!isa<ConstantSDNode>(Src.getOperand(1))) {
	SDValue BitNo = Src.getOperand(1);
	Src = Src.getOperand(0);
	// Peek through AND(SRL(NOT(X),Y),1).
	if (isBitwiseNot(Src)) {
	Src = Src.getOperand(0);
	X86CC = X86CC == X86::COND_AE ? X86::COND_B : X86::COND_AE;
	ContainsNOT = true;
	}
	// If we have BMI2 then SHRX should be faster for i32/i64 cases.
	if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32))
	if (SDValue BT = getBT(Src, BitNo, dl, DAG))
	return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);
	}
	}

	if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
	// Attempt to recursively combine a bitmask AND with shuffles.
	SDValue Op(N, 0);
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;

	// If either operand is a constant mask, then only the elements that aren't
	// zero are actually demanded by the other operand.
	auto GetDemandedMasks = [&](SDValue Op) {
	APInt UndefElts;
	SmallVector<APInt> EltBits;
	int NumElts = VT.getVectorNumElements();
	int EltSizeInBits = VT.getScalarSizeInBits();
	APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
	APInt DemandedElts = APInt::getAllOnes(NumElts);
	if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
	EltBits)) {
	DemandedBits.clearAllBits();
	DemandedElts.clearAllBits();
	for (int I = 0; I != NumElts; ++I) {
	if (UndefElts[I]) {
	// We can't assume an undef src element gives an undef dst - the
	// other src might be zero.
	DemandedBits.setAllBits();
	DemandedElts.setBit(I);
	} else if (!EltBits[I].isZero()) {
	DemandedBits \|= EltBits[I];
	DemandedElts.setBit(I);
	}
	}
	}
	return std::make_pair(DemandedBits, DemandedElts);
	};
	APInt Bits0, Elts0;
	APInt Bits1, Elts1;
	std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
	std::tie(Bits1, Elts1) = GetDemandedMasks(N0);

	if (TLI.SimplifyDemandedVectorElts(N0, Elts0, DCI) \|\|
	TLI.SimplifyDemandedVectorElts(N1, Elts1, DCI) \|\|
	TLI.SimplifyDemandedBits(N0, Bits0, Elts0, DCI) \|\|
	TLI.SimplifyDemandedBits(N1, Bits1, Elts1, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}

	SDValue NewN0 = TLI.SimplifyMultipleUseDemandedBits(N0, Bits0, Elts0, DAG);
	SDValue NewN1 = TLI.SimplifyMultipleUseDemandedBits(N1, Bits1, Elts1, DAG);
	if (NewN0 \|\| NewN1)
	return DAG.getNode(ISD::AND, dl, VT, NewN0 ? NewN0 : N0,
	NewN1 ? NewN1 : N1);
	}

	// Attempt to combine a scalar bitmask AND with an extracted shuffle.
	if ((VT.getScalarSizeInBits() % 8) == 0 &&
	N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	isa<ConstantSDNode>(N0.getOperand(1))) {
	SDValue BitMask = N1;
	SDValue SrcVec = N0.getOperand(0);
	EVT SrcVecVT = SrcVec.getValueType();

	// Check that the constant bitmask masks whole bytes.
	APInt UndefElts;
	SmallVector<APInt, 64> EltBits;
	if (VT == SrcVecVT.getScalarType() && N0->isOnlyUserOf(SrcVec.getNode()) &&
	getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) &&
	llvm::all_of(EltBits, [](const APInt &M) {
	return M.isZero() \|\| M.isAllOnes();
	})) {
	unsigned NumElts = SrcVecVT.getVectorNumElements();
	unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8;
	unsigned Idx = N0.getConstantOperandVal(1);

	// Create a root shuffle mask from the byte mask and the extracted index.
	SmallVector<int, 16> ShuffleMask(NumElts * Scale, SM_SentinelUndef);
	for (unsigned i = 0; i != Scale; ++i) {
	if (UndefElts[i])
	continue;
	int VecIdx = Scale * Idx + i;
	ShuffleMask[VecIdx] = EltBits[i].isZero() ? SM_SentinelZero : VecIdx;
	}

	if (SDValue Shuffle = combineX86ShufflesRecursively(
	{SrcVec}, 0, SrcVec, ShuffleMask, {}, /Depth/ 1,
	X86::MaxShuffleCombineDepth,
	/HasVarMask/ false, /AllowVarCrossLaneMask/ true,
	/AllowVarPerLaneMask/ true, DAG, Subtarget))
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
	N0.getOperand(1));
	}
	}

	return SDValue();
	}

	// Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
	static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");

	MVT VT = N->getSimpleValueType(0);
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	if (!VT.isVector() \|\| (EltSizeInBits % 8) != 0)
	return SDValue();

	SDValue N0 = peekThroughBitcasts(N->getOperand(0));
	SDValue N1 = peekThroughBitcasts(N->getOperand(1));
	if (N0.getOpcode() != ISD::AND \|\| N1.getOpcode() != ISD::AND)
	return SDValue();

	// On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use
	// VPTERNLOG. Otherwise only do this if either mask has multiple uses already.
	if (!(Subtarget.hasXOP() \|\| useVPTERNLOG(Subtarget, VT) \|\|
	!N0.getOperand(1).hasOneUse() \|\| !N1.getOperand(1).hasOneUse()))
	return SDValue();

	// Attempt to extract constant byte masks.
	APInt UndefElts0, UndefElts1;
	SmallVector<APInt, 32> EltBits0, EltBits1;
	if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0,
	false, false))
	return SDValue();
	if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1,
	false, false))
	return SDValue();

	for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) {
	// TODO - add UNDEF elts support.
	if (UndefElts0[i] \|\| UndefElts1[i])
	return SDValue();
	if (EltBits0[i] != ~EltBits1[i])
	return SDValue();
	}

	SDLoc DL(N);

	if (useVPTERNLOG(Subtarget, VT)) {
	// Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.
	// VPTERNLOG is only available as vXi32/64-bit types.
	MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64;
	MVT OpVT =
	MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());
	SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));
	SDValue B = DAG.getBitcast(OpVT, N0.getOperand(0));
	SDValue C = DAG.getBitcast(OpVT, N1.getOperand(0));
	SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
	SDValue Res = getAVX512Node(X86ISD::VPTERNLOG, DL, OpVT, {A, B, C, Imm},
	DAG, Subtarget);
	return DAG.getBitcast(VT, Res);
	}

	SDValue X = N->getOperand(0);
	SDValue Y =
	DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),
	DAG.getBitcast(VT, N1.getOperand(0)));
	return DAG.getNode(ISD::OR, DL, VT, X, Y);
	}

	// Try to match OR(AND(~MASK,X),AND(MASK,Y)) logic pattern.
	static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
	if (N->getOpcode() != ISD::OR)
	return false;

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Canonicalize AND to LHS.
	if (N1.getOpcode() == ISD::AND)
	std::swap(N0, N1);

	// Attempt to match OR(AND(M,Y),ANDNP(M,X)).
	if (N0.getOpcode() != ISD::AND \|\| N1.getOpcode() != X86ISD::ANDNP)
	return false;

	Mask = N1.getOperand(0);
	X = N1.getOperand(1);

	// Check to see if the mask appeared in both the AND and ANDNP.
	if (N0.getOperand(0) == Mask)
	Y = N0.getOperand(1);
	else if (N0.getOperand(1) == Mask)
	Y = N0.getOperand(0);
	else
	return false;

	// TODO: Attempt to match against AND(XOR(-1,M),Y) as well, waiting for
	// ANDNP combine allows other combines to happen that prevent matching.
	return true;
	}

	// Try to fold:
	// (or (and (m, y), (pandn m, x)))
	// into:
	// (vselect m, x, y)
	// As a special case, try to fold:
	// (or (and (m, (sub 0, x)), (pandn m, x)))
	// into:
	// (sub (xor X, M), M)
	static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");

	EVT VT = N->getValueType(0);
	if (!((VT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(VT.is256BitVector() && Subtarget.hasInt256())))
	return SDValue();

	SDValue X, Y, Mask;
	if (!matchLogicBlend(N, X, Y, Mask))
	return SDValue();

	// Validate that X, Y, and Mask are bitcasts, and see through them.
	Mask = peekThroughBitcasts(Mask);
	X = peekThroughBitcasts(X);
	Y = peekThroughBitcasts(Y);

	EVT MaskVT = Mask.getValueType();
	unsigned EltBits = MaskVT.getScalarSizeInBits();

	// TODO: Attempt to handle floating point cases as well?
	if (!MaskVT.isInteger() \|\| DAG.ComputeNumSignBits(Mask) != EltBits)
	return SDValue();

	SDLoc DL(N);

	// Attempt to combine to conditional negate: (sub (xor X, M), M)
	if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL,
	DAG, Subtarget))
	return Res;

	// PBLENDVB is only available on SSE 4.1.
	if (!Subtarget.hasSSE41())
	return SDValue();

	// If we have VPTERNLOG we should prefer that since PBLENDVB is multiple uops.
	if (Subtarget.hasVLX())
	return SDValue();

	MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8;

	X = DAG.getBitcast(BlendVT, X);
	Y = DAG.getBitcast(BlendVT, Y);
	Mask = DAG.getBitcast(BlendVT, Mask);
	Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X);
	return DAG.getBitcast(VT, Mask);
	}

	// Helper function for combineOrCmpEqZeroToCtlzSrl
	// Transforms:
	// seteq(cmp x, 0)
	// into:
	// srl(ctlz x), log2(bitsize(x))
	// Input pattern is checked by caller.
	static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
	SDValue Cmp = Op.getOperand(1);
	EVT VT = Cmp.getOperand(0).getValueType();
	unsigned Log2b = Log2_32(VT.getSizeInBits());
	SDLoc dl(Op);
	SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));
	// The result of the shift is true or false, and on X86, the 32-bit
	// encoding of shr and lzcnt is more desirable.
	SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
	SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
	DAG.getConstant(Log2b, dl, MVT::i8));
	return Scc;
	}

	// Try to transform:
	// zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0))))
	// into:
	// srl(or(ctlz(x), ctlz(y)), log2(bitsize(x))
	// Will also attempt to match more generic cases, eg:
	// zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0)))
	// Only applies if the target supports the FastLZCNT feature.
	static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalize() \|\| !Subtarget.getTargetLowering()->isCtlzFast())
	return SDValue();

	auto isORCandidate = [](SDValue N) {
	return (N->getOpcode() == ISD::OR && N->hasOneUse());
	};

	// Check the zero extend is extending to 32-bit or more. The code generated by
	// srl(ctlz) for 16-bit or less variants of the pattern would require extra
	// instructions to clear the upper bits.
	if (!N->hasOneUse() \|\| !N->getSimpleValueType(0).bitsGE(MVT::i32) \|\|
	!isORCandidate(N->getOperand(0)))
	return SDValue();

	// Check the node matches: setcc(eq, cmp 0)
	auto isSetCCCandidate = [](SDValue N) {
	return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
	X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
	N->getOperand(1).getOpcode() == X86ISD::CMP &&
	isNullConstant(N->getOperand(1).getOperand(1)) &&
	N->getOperand(1).getValueType().bitsGE(MVT::i32);
	};

	SDNode *OR = N->getOperand(0).getNode();
	SDValue LHS = OR->getOperand(0);
	SDValue RHS = OR->getOperand(1);

	// Save nodes matching or(or, setcc(eq, cmp 0)).
	SmallVector<SDNode *, 2> ORNodes;
	while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) \|\|
	(isORCandidate(RHS) && isSetCCCandidate(LHS)))) {
	ORNodes.push_back(OR);
	OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode();
	LHS = OR->getOperand(0);
	RHS = OR->getOperand(1);
	}

	// The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)).
	if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) \|\|
	!isORCandidate(SDValue(OR, 0)))
	return SDValue();

	// We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it
	// to
	// or(srl(ctlz),srl(ctlz)).
	// The dag combiner can then fold it into:
	// srl(or(ctlz, ctlz)).
	SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
	SDValue Ret, NewRHS;
	if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
	Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);

	if (!Ret)
	return SDValue();

	// Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern.
	while (ORNodes.size() > 0) {
	OR = ORNodes.pop_back_val();
	LHS = OR->getOperand(0);
	RHS = OR->getOperand(1);
	// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
	if (RHS->getOpcode() == ISD::OR)
	std::swap(LHS, RHS);
	NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
	if (!NewRHS)
	return SDValue();
	Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
	}

	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
	}

	static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
	SDValue And1_L, SDValue And1_R,
	const SDLoc &DL, SelectionDAG &DAG) {
	if (!isBitwiseNot(And0_L, true) \|\| !And0_L->hasOneUse())
	return SDValue();
	SDValue NotOp = And0_L->getOperand(0);
	if (NotOp == And1_R)
	std::swap(And1_R, And1_L);
	if (NotOp != And1_L)
	return SDValue();

	// (~(NotOp) & And0_R) \| (NotOp & And1_R)
	// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
	EVT VT = And1_L->getValueType(0);
	SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
	SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
	SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
	SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
	return Xor1;
	}

	/// Fold "masked merge" expressions like `(m & x) \| (~m & y)` into the
	/// equivalent `((x ^ y) & m) ^ y)` pattern.
	/// This is typically a better representation for targets without a fused
	/// "and-not" operation. This function is intended to be called from a
	/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
	static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
	// Note that masked-merge variants using XOR or ADD expressions are
	// normalized to OR by InstCombine so we only check for OR.
	assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
	SDValue N0 = Node->getOperand(0);
	if (N0->getOpcode() != ISD::AND \|\| !N0->hasOneUse())
	return SDValue();
	SDValue N1 = Node->getOperand(1);
	if (N1->getOpcode() != ISD::AND \|\| !N1->hasOneUse())
	return SDValue();

	SDLoc DL(Node);
	SDValue N00 = N0->getOperand(0);
	SDValue N01 = N0->getOperand(1);
	SDValue N10 = N1->getOperand(0);
	SDValue N11 = N1->getOperand(1);
	if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
	return Result;
	if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
	return Result;
	if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
	return Result;
	if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
	return Result;
	return SDValue();
	}

	static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// If this is SSE1 only convert to FOR to avoid scalarization.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
	return DAG.getBitcast(MVT::v4i32,
	DAG.getNode(X86ISD::FOR, dl, MVT::v4f32,
	DAG.getBitcast(MVT::v4f32, N0),
	DAG.getBitcast(MVT::v4f32, N1)));
	}

	// Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
	// TODO: Support multiple SrcOps.
	if (VT == MVT::i1) {
	SmallVector<SDValue, 2> SrcOps;
	SmallVector<APInt, 2> SrcPartials;
	if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) &&
	SrcOps.size() == 1) {
	unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
	EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
	SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
	if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType()))
	Mask = DAG.getBitcast(MaskVT, SrcOps[0]);
	if (Mask) {
	assert(SrcPartials[0].getBitWidth() == NumElts &&
	"Unexpected partial reduction mask");
	SDValue ZeroBits = DAG.getConstant(0, dl, MaskVT);
	SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT);
	Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits);
	return DAG.getSetCC(dl, MVT::i1, Mask, ZeroBits, ISD::SETNE);
	}
	}
	}

	if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
	return R;

	if (SDValue R = combineBitOpWithShift(N, DAG))
	return R;

	if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
	return FPLogic;

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
	return R;

	if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
	return R;

	if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
	return R;

	// (0 - SetCC) \| C -> (zext (not SetCC)) * (C + 1) - 1 if we can get a LEA out of it.
	if ((VT == MVT::i32 \|\| VT == MVT::i64) &&
	N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
	isNullConstant(N0.getOperand(0))) {
	SDValue Cond = N0.getOperand(1);
	if (Cond.getOpcode() == ISD::ZERO_EXTEND && Cond.hasOneUse())
	Cond = Cond.getOperand(0);

	if (Cond.getOpcode() == X86ISD::SETCC && Cond.hasOneUse()) {
	if (auto *CN = dyn_cast<ConstantSDNode>(N1)) {
	uint64_t Val = CN->getZExtValue();
	if (Val == 1 \|\| Val == 2 \|\| Val == 3 \|\| Val == 4 \|\| Val == 7 \|\| Val == 8) {
	X86::CondCode CCode = (X86::CondCode)Cond.getConstantOperandVal(0);
	CCode = X86::GetOppositeBranchCondition(CCode);
	SDValue NotCond = getSETCC(CCode, Cond.getOperand(1), SDLoc(Cond), DAG);

	SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT);
	R = DAG.getNode(ISD::MUL, dl, VT, R, DAG.getConstant(Val + 1, dl, VT));
	R = DAG.getNode(ISD::SUB, dl, VT, R, DAG.getConstant(1, dl, VT));
	return R;
	}
	}
	}
	}

	// Combine OR(X,KSHIFTL(Y,Elts/2)) -> CONCAT_VECTORS(X,Y) == KUNPCK(X,Y).
	// Combine OR(KSHIFTL(X,Elts/2),Y) -> CONCAT_VECTORS(Y,X) == KUNPCK(Y,X).
	// iff the upper elements of the non-shifted arg are zero.
	// KUNPCK require 16+ bool vector elements.
	if (N0.getOpcode() == X86ISD::KSHIFTL \|\| N1.getOpcode() == X86ISD::KSHIFTL) {
	unsigned NumElts = VT.getVectorNumElements();
	unsigned HalfElts = NumElts / 2;
	APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts);
	if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL &&
	N1.getConstantOperandAPInt(1) == HalfElts &&
	DAG.MaskedVectorIsZero(N0, UpperElts)) {
	return DAG.getNode(
	ISD::CONCAT_VECTORS, dl, VT,
	extractSubVector(N0, 0, DAG, dl, HalfElts),
	extractSubVector(N1.getOperand(0), 0, DAG, dl, HalfElts));
	}
	if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL &&
	N0.getConstantOperandAPInt(1) == HalfElts &&
	DAG.MaskedVectorIsZero(N1, UpperElts)) {
	return DAG.getNode(
	ISD::CONCAT_VECTORS, dl, VT,
	extractSubVector(N1, 0, DAG, dl, HalfElts),
	extractSubVector(N0.getOperand(0), 0, DAG, dl, HalfElts));
	}
	}

	if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
	// Attempt to recursively combine an OR of shuffles.
	SDValue Op(N, 0);
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;

	// If either operand is a constant mask, then only the elements that aren't
	// allones are actually demanded by the other operand.
	auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
	APInt UndefElts;
	SmallVector<APInt> EltBits;
	int NumElts = VT.getVectorNumElements();
	int EltSizeInBits = VT.getScalarSizeInBits();
	if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
	return false;

	APInt DemandedElts = APInt::getZero(NumElts);
	for (int I = 0; I != NumElts; ++I)
	if (!EltBits[I].isAllOnes())
	DemandedElts.setBit(I);

	return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, DCI);
	};
	if (SimplifyUndemandedElts(N0, N1) \|\| SimplifyUndemandedElts(N1, N0)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}
	}

	// We should fold "masked merge" patterns when `andn` is not available.
	if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
	if (SDValue R = foldMaskedMerge(N, DAG))
	return R;

	return SDValue();
	}

	/// Try to turn tests against the signbit in the form of:
	/// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
	/// into:
	/// SETGT(X, -1)
	static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
	// This is only worth doing if the output type is i8 or i1.
	EVT ResultType = N->getValueType(0);
	if (ResultType != MVT::i8 && ResultType != MVT::i1)
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// We should be performing an xor against a truncated shift.
	if (N0.getOpcode() != ISD::TRUNCATE \|\| !N0.hasOneUse())
	return SDValue();

	// Make sure we are performing an xor against one.
	if (!isOneConstant(N1))
	return SDValue();

	// SetCC on x86 zero extends so only act on this if it's a logical shift.
	SDValue Shift = N0.getOperand(0);
	if (Shift.getOpcode() != ISD::SRL \|\| !Shift.hasOneUse())
	return SDValue();

	// Make sure we are truncating from one of i16, i32 or i64.
	EVT ShiftTy = Shift.getValueType();
	if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
	return SDValue();

	// Make sure the shift amount extracts the sign bit.
	if (!isa<ConstantSDNode>(Shift.getOperand(1)) \|\|
	Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1))
	return SDValue();

	// Create a greater-than comparison against -1.
	// N.B. Using SETGE against 0 works but we want a canonical looking
	// comparison, using SETGT matches up with what TranslateX86CC.
	SDLoc DL(N);
	SDValue ShiftOp = Shift.getOperand(0);
	EVT ShiftOpTy = ShiftOp.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(), ResultType);
	SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp,
	DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
	if (SetCCResultType != ResultType)
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond);
	return Cond;
	}

	/// Turn vector tests of the signbit in the form of:
	/// xor (sra X, elt_size(X)-1), -1
	/// into:
	/// pcmpgt X, -1
	///
	/// This should be called before type legalization because the pattern may not
	/// persist after that.
	static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	if (!VT.isSimple())
	return SDValue();

	switch (VT.getSimpleVT().SimpleTy) {
	default: return SDValue();
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break;
	case MVT::v32i8:
	case MVT::v16i16:
	case MVT::v8i32:
	case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break;
	}

	// There must be a shift right algebraic before the xor, and the xor must be a
	// 'not' operation.
	SDValue Shift = N->getOperand(0);
	SDValue Ones = N->getOperand(1);
	if (Shift.getOpcode() != ISD::SRA \|\| !Shift.hasOneUse() \|\|
	!ISD::isBuildVectorAllOnes(Ones.getNode()))
	return SDValue();

	// The shift should be smearing the sign bit across each vector element.
	auto *ShiftAmt =
	isConstOrConstSplat(Shift.getOperand(1), /AllowUndefs/ true);
	if (!ShiftAmt \|\|
	ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1))
	return SDValue();

	// Create a greater-than comparison against -1. We don't use the more obvious
	// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
	return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT);
	}

	/// Detect patterns of truncation with unsigned saturation:
	///
	/// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
	/// Return the source value x to be truncated or SDValue() if the pattern was
	/// not matched.
	///
	/// 2. (truncate (smin (smax (x, C1), C2)) to dest_type),
	/// where C1 >= 0 and C2 is unsigned max of destination type.
	///
	/// (truncate (smax (smin (x, C2), C1)) to dest_type)
	/// where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
	///
	/// These two patterns are equivalent to:
	/// (truncate (umin (smax(x, C1), unsigned_max_of_dest_type)) to dest_type)
	/// So return the smax(x, C1) value to be truncated or SDValue() if the
	/// pattern was not matched.
	static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG,
	const SDLoc &DL) {
	EVT InVT = In.getValueType();

	// Saturation with truncation. We truncate from InVT to VT.
	assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
	"Unexpected types for truncate operation");

	// Match min/max and return limit value as a parameter.
	auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
	if (V.getOpcode() == Opcode &&
	ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
	return V.getOperand(0);
	return SDValue();
	};

	APInt C1, C2;
	if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2))
	// C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
	// the element size of the destination type.
	if (C2.isMask(VT.getScalarSizeInBits()))
	return UMin;

	if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2))
	if (MatchMinMax(SMin, ISD::SMAX, C1))
	if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))
	return SMin;

	if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1))
	if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2))
	if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&
	C2.uge(C1)) {
	return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
	}

	return SDValue();
	}

	/// Detect patterns of truncation with signed saturation:
	/// (truncate (smin ((smax (x, signed_min_of_dest_type)),
	/// signed_max_of_dest_type)) to dest_type)
	/// or:
	/// (truncate (smax ((smin (x, signed_max_of_dest_type)),
	/// signed_min_of_dest_type)) to dest_type).
	/// With MatchPackUS, the smax/smin range is [0, unsigned_max_of_dest_type].
	/// Return the source value to be truncated or SDValue() if the pattern was not
	/// matched.
	static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
	unsigned NumDstBits = VT.getScalarSizeInBits();
	unsigned NumSrcBits = In.getScalarValueSizeInBits();
	assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");

	auto MatchMinMax = [](SDValue V, unsigned Opcode,
	const APInt &Limit) -> SDValue {
	APInt C;
	if (V.getOpcode() == Opcode &&
	ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit)
	return V.getOperand(0);
	return SDValue();
	};

	APInt SignedMax, SignedMin;
	if (MatchPackUS) {
	SignedMax = APInt::getAllOnes(NumDstBits).zext(NumSrcBits);
	SignedMin = APInt(NumSrcBits, 0);
	} else {
	SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
	SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
	}

	if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax))
	if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin))
	return SMax;

	if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin))
	if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax))
	return SMin;

	return SDValue();
	}

	static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasSSE2() \|\| !VT.isVector())
	return SDValue();

	EVT SVT = VT.getVectorElementType();
	EVT InVT = In.getValueType();
	EVT InSVT = InVT.getVectorElementType();

	// If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is
	// split across two registers. We can use a packusdw+perm to clamp to 0-65535
	// and concatenate at the same time. Then we can use a final vpmovuswb to
	// clip to 0-255.
	if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
	InVT == MVT::v16i32 && VT == MVT::v16i8) {
	if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
	// Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB.
	SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
	DL, DAG, Subtarget);
	assert(Mid && "Failed to pack!");
	return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid);
	}
	}

	// vXi32 truncate instructions are available with AVX512F.
	// vXi16 truncate instructions are only available with AVX512BW.
	// For 256-bit or smaller vectors, we require VLX.
	// FIXME: We could widen truncates to 512 to remove the VLX restriction.
	// If the result type is 256-bits or larger and we have disable 512-bit
	// registers, we should go ahead and use the pack instructions if possible.
	bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) \|\|
	(Subtarget.hasBWI() && InSVT == MVT::i16)) &&
	(InVT.getSizeInBits() > 128) &&
	(Subtarget.hasVLX() \|\| InVT.getSizeInBits() > 256) &&
	!(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);

	if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 &&
	VT.getSizeInBits() >= 64 &&
	(SVT == MVT::i8 \|\| SVT == MVT::i16) &&
	(InSVT == MVT::i16 \|\| InSVT == MVT::i32)) {
	if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
	// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
	// Only do this when the result is at least 64 bits or we'll leaving
	// dangling PACKSSDW nodes.
	if (SVT == MVT::i8 && InSVT == MVT::i32) {
	EVT MidVT = VT.changeVectorElementType(MVT::i16);
	SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
	DAG, Subtarget);
	assert(Mid && "Failed to pack!");
	SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
	Subtarget);
	assert(V && "Failed to pack!");
	return V;
	} else if (SVT == MVT::i8 \|\| Subtarget.hasSSE41())
	return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG,
	Subtarget);
	}
	if (SDValue SSatVal = detectSSatPattern(In, VT))
	return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG,
	Subtarget);
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 &&
	Subtarget.hasAVX512() && (InSVT != MVT::i16 \|\| Subtarget.hasBWI()) &&
	(SVT == MVT::i32 \|\| SVT == MVT::i16 \|\| SVT == MVT::i8)) {
	unsigned TruncOpc = 0;
	SDValue SatVal;
	if (SDValue SSatVal = detectSSatPattern(In, VT)) {
	SatVal = SSatVal;
	TruncOpc = X86ISD::VTRUNCS;
	} else if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) {
	SatVal = USatVal;
	TruncOpc = X86ISD::VTRUNCUS;
	}
	if (SatVal) {
	unsigned ResElts = VT.getVectorNumElements();
	// If the input type is less than 512 bits and we don't have VLX, we need
	// to widen to 512 bits.
	if (!Subtarget.hasVLX() && !InVT.is512BitVector()) {
	unsigned NumConcats = 512 / InVT.getSizeInBits();
	ResElts *= NumConcats;
	SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT));
	ConcatOps[0] = SatVal;
	InVT = EVT::getVectorVT(*DAG.getContext(), InSVT,
	NumConcats * InVT.getVectorNumElements());
	SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps);
	}
	// Widen the result if its narrower than 128 bits.
	if (ResElts * SVT.getSizeInBits() < 128)
	ResElts = 128 / SVT.getSizeInBits();
	EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts);
	SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	}
	}

	return SDValue();
	}

	/// This function detects the AVG pattern between vectors of unsigned i8/i16,
	/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
	/// ISD::AVGCEILU (AVG) instruction.
	static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	const SDLoc &DL) {
	if (!VT.isVector())
	return SDValue();
	EVT InVT = In.getValueType();
	unsigned NumElems = VT.getVectorNumElements();

	EVT ScalarVT = VT.getVectorElementType();
	if (!((ScalarVT == MVT::i8 \|\| ScalarVT == MVT::i16) && NumElems >= 2))
	return SDValue();

	// InScalarVT is the intermediate type in AVG pattern and it should be greater
	// than the original input type (i8/i16).
	EVT InScalarVT = InVT.getVectorElementType();
	if (InScalarVT.getFixedSizeInBits() <= ScalarVT.getFixedSizeInBits())
	return SDValue();

	if (!Subtarget.hasSSE2())
	return SDValue();

	// Detect the following pattern:
	//
	// %1 = zext <N x i8> %a to <N x i32>
	// %2 = zext <N x i8> %b to <N x i32>
	// %3 = add nuw nsw <N x i32> %1, <i32 1 x N>
	// %4 = add nuw nsw <N x i32> %3, %2
	// %5 = lshr <N x i32> %N, <i32 1 x N>
	// %6 = trunc <N x i32> %5 to <N x i8>
	//
	// In AVX512, the last instruction can also be a trunc store.
	if (In.getOpcode() != ISD::SRL)
	return SDValue();

	// A lambda checking the given SDValue is a constant vector and each element
	// is in the range [Min, Max].
	auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
	return ISD::matchUnaryPredicate(V, [Min, Max](ConstantSDNode *C) {
	return !(C->getAPIntValue().ult(Min) \|\| C->getAPIntValue().ugt(Max));
	});
	};

	auto IsZExtLike = [DAG = &DAG, ScalarVT](SDValue V) {
	unsigned MaxActiveBits = DAG->computeKnownBits(V).countMaxActiveBits();
	return MaxActiveBits <= ScalarVT.getSizeInBits();
	};

	// Check if each element of the vector is right-shifted by one.
	SDValue LHS = In.getOperand(0);
	SDValue RHS = In.getOperand(1);
	if (!IsConstVectorInRange(RHS, 1, 1))
	return SDValue();
	if (LHS.getOpcode() != ISD::ADD)
	return SDValue();

	// Detect a pattern of a + b + 1 where the order doesn't matter.
	SDValue Operands[3];
	Operands[0] = LHS.getOperand(0);
	Operands[1] = LHS.getOperand(1);

	auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	return DAG.getNode(ISD::AVGCEILU, DL, Ops[0].getValueType(), Ops);
	};

	auto AVGSplitter = [&](std::array<SDValue, 2> Ops) {
	for (SDValue &Op : Ops)
	if (Op.getValueType() != VT)
	Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
	// Pad to a power-of-2 vector, split+apply and extract the original vector.
	unsigned NumElemsPow2 = PowerOf2Ceil(NumElems);
	EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2);
	if (NumElemsPow2 != NumElems) {
	for (SDValue &Op : Ops) {
	SmallVector<SDValue, 32> EltsOfOp(NumElemsPow2, DAG.getUNDEF(ScalarVT));
	for (unsigned i = 0; i != NumElems; ++i) {
	SDValue Idx = DAG.getIntPtrConstant(i, DL);
	EltsOfOp[i] =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op, Idx);
	}
	Op = DAG.getBuildVector(Pow2VT, DL, EltsOfOp);
	}
	}
	SDValue Res = SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, Ops, AVGBuilder);
	if (NumElemsPow2 == NumElems)
	return Res;
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	};

	// Take care of the case when one of the operands is a constant vector whose
	// element is in the range [1, 256].
	if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
	IsZExtLike(Operands[0])) {
	// The pattern is detected. Subtract one from the constant vector, then
	// demote it and emit X86ISD::AVG instruction.
	SDValue VecOnes = DAG.getConstant(1, DL, InVT);
	Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
	return AVGSplitter({Operands[0], Operands[1]});
	}

	// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
	// Match the or case only if its 'add-like' - can be replaced by an add.
	auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) {
	if (ISD::ADD == V.getOpcode()) {
	Op0 = V.getOperand(0);
	Op1 = V.getOperand(1);
	return true;
	}
	if (ISD::ZERO_EXTEND != V.getOpcode())
	return false;
	V = V.getOperand(0);
	if (V.getValueType() != VT \|\| ISD::OR != V.getOpcode() \|\|
	!DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)))
	return false;
	Op0 = V.getOperand(0);
	Op1 = V.getOperand(1);
	return true;
	};

	SDValue Op0, Op1;
	if (FindAddLike(Operands[0], Op0, Op1))
	std::swap(Operands[0], Operands[1]);
	else if (!FindAddLike(Operands[1], Op0, Op1))
	return SDValue();
	Operands[2] = Op0;
	Operands[1] = Op1;

	// Now we have three operands of two additions. Check that one of them is a
	// constant vector with ones, and the other two can be promoted from i8/i16.
	for (SDValue &Op : Operands) {
	if (!IsConstVectorInRange(Op, 1, 1))
	continue;
	std::swap(Op, Operands[2]);

	// Check if Operands[0] and Operands[1] are results of type promotion.
	for (int j = 0; j < 2; ++j)
	if (Operands[j].getValueType() != VT)
	if (!IsZExtLike(Operands[j]))
	return SDValue();

	// The pattern is detected, emit X86ISD::AVG instruction(s).
	return AVGSplitter({Operands[0], Operands[1]});
	}

	return SDValue();
	}

	static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	LoadSDNode *Ld = cast<LoadSDNode>(N);
	EVT RegVT = Ld->getValueType(0);
	EVT MemVT = Ld->getMemoryVT();
	SDLoc dl(Ld);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// For chips with slow 32-byte unaligned loads, break the 32-byte operation
	// into two 16-byte operations. Also split non-temporal aligned loads on
	// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
	ISD::LoadExtType Ext = Ld->getExtensionType();
	unsigned Fast;
	if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
	Ext == ISD::NON_EXTLOAD &&
	((Ld->isNonTemporal() && !Subtarget.hasInt256() &&
	Ld->getAlign() >= Align(16)) \|\|
	(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
	*Ld->getMemOperand(), &Fast) &&
	!Fast))) {
	unsigned NumElems = RegVT.getVectorNumElements();
	if (NumElems < 2)
	return SDValue();

	unsigned HalfOffset = 16;
	SDValue Ptr1 = Ld->getBasePtr();
	SDValue Ptr2 =
	DAG.getMemBasePlusOffset(Ptr1, TypeSize::Fixed(HalfOffset), dl);
	EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
	NumElems / 2);
	SDValue Load1 =
	DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),
	Ld->getOriginalAlign(),
	Ld->getMemOperand()->getFlags());
	SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2,
	Ld->getPointerInfo().getWithOffset(HalfOffset),
	Ld->getOriginalAlign(),
	Ld->getMemOperand()->getFlags());
	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	Load1.getValue(1), Load2.getValue(1));

	SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
	return DCI.CombineTo(N, NewVec, TF, true);
	}

	// Bool vector load - attempt to cast to an integer, as we have good
	// (vXiY *ext(vXi1 bitcast(iX))) handling.
	if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() &&
	RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) {
	unsigned NumElts = RegVT.getVectorNumElements();
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
	if (TLI.isTypeLegal(IntVT)) {
	SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(),
	Ld->getOriginalAlign(),
	Ld->getMemOperand()->getFlags());
	SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad);
	return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true);
	}
	}

	// If we also broadcast this as a subvector to a wider type, then just extract
	// the lowest subvector.
	if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&
	(RegVT.is128BitVector() \|\| RegVT.is256BitVector())) {
	SDValue Ptr = Ld->getBasePtr();
	SDValue Chain = Ld->getChain();
	for (SDNode *User : Ptr->uses()) {
	if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
	cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
	cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
	cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
	MemVT.getSizeInBits() &&
	!User->hasAnyUseOfValue(1) &&
	User->getValueSizeInBits(0).getFixedValue() >
	RegVT.getFixedSizeInBits()) {
	SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
	RegVT.getSizeInBits());
	Extract = DAG.getBitcast(RegVT, Extract);
	return DCI.CombineTo(N, Extract, SDValue(User, 1));
	}
	}
	}

	// Cast ptr32 and ptr64 pointers to the default address space before a load.
	unsigned AddrSpace = Ld->getAddressSpace();
	if (AddrSpace == X86AS::PTR64 \|\| AddrSpace == X86AS::PTR32_SPTR \|\|
	AddrSpace == X86AS::PTR32_UPTR) {
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	if (PtrVT != Ld->getBasePtr().getSimpleValueType()) {
	SDValue Cast =
	DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0);
	return DAG.getLoad(RegVT, dl, Ld->getChain(), Cast, Ld->getPointerInfo(),
	Ld->getOriginalAlign(),
	Ld->getMemOperand()->getFlags());
	}
	}

	return SDValue();
	}

	/// If V is a build vector of boolean constants and exactly one of those
	/// constants is true, return the operand index of that true element.
	/// Otherwise, return -1.
	static int getOneTrueElt(SDValue V) {
	// This needs to be a build vector of booleans.
	// TODO: Checking for the i1 type matches the IR definition for the mask,
	// but the mask check could be loosened to i8 or other types. That might
	// also require checking more than 'allOnesValue'; eg, the x86 HW
	// instructions only require that the MSB is set for each mask element.
	// The ISD::MSTORE comments/definition do not specify how the mask operand
	// is formatted.
	auto *BV = dyn_cast<BuildVectorSDNode>(V);
	if (!BV \|\| BV->getValueType(0).getVectorElementType() != MVT::i1)
	return -1;

	int TrueIndex = -1;
	unsigned NumElts = BV->getValueType(0).getVectorNumElements();
	for (unsigned i = 0; i < NumElts; ++i) {
	const SDValue &Op = BV->getOperand(i);
	if (Op.isUndef())
	continue;
	auto *ConstNode = dyn_cast<ConstantSDNode>(Op);
	if (!ConstNode)
	return -1;
	if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) {
	// If we already found a one, this is too many.
	if (TrueIndex >= 0)
	return -1;
	TrueIndex = i;
	}
	}
	return TrueIndex;
	}

	/// Given a masked memory load/store operation, return true if it has one mask
	/// bit set. If it has one mask bit set, then also return the memory address of
	/// the scalar element to load/store, the vector index to insert/extract that
	/// scalar element, and the alignment for the scalar memory access.
	static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
	SelectionDAG &DAG, SDValue &Addr,
	SDValue &Index, Align &Alignment,
	unsigned &Offset) {
	int TrueMaskElt = getOneTrueElt(MaskedOp->getMask());
	if (TrueMaskElt < 0)
	return false;

	// Get the address of the one scalar element that is specified by the mask
	// using the appropriate offset from the base pointer.
	EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType();
	Offset = 0;
	Addr = MaskedOp->getBasePtr();
	if (TrueMaskElt != 0) {
	Offset = TrueMaskElt * EltVT.getStoreSize();
	Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset),
	SDLoc(MaskedOp));
	}

	Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp));
	Alignment = commonAlignment(MaskedOp->getOriginalAlign(),
	EltVT.getStoreSize());
	return true;
	}

	/// If exactly one element of the mask is set for a non-extending masked load,
	/// it is a scalar load and vector insert.
	/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
	/// mask have already been optimized in IR, so we don't bother with those here.
	static SDValue
	reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert(ML->isUnindexed() && "Unexpected indexed masked load!");
	// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
	// However, some target hooks may need to be added to know when the transform
	// is profitable. Endianness would also have to be considered.

	SDValue Addr, VecIndex;
	Align Alignment;
	unsigned Offset;
	if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset))
	return SDValue();

	// Load the one scalar element that is specified by the mask using the
	// appropriate offset from the base pointer.
	SDLoc DL(ML);
	EVT VT = ML->getValueType(0);
	EVT EltVT = VT.getVectorElementType();

	EVT CastVT = VT;
	if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
	EltVT = MVT::f64;
	CastVT = VT.changeVectorElementType(EltVT);
	}

	SDValue Load =
	DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
	ML->getPointerInfo().getWithOffset(Offset),
	Alignment, ML->getMemOperand()->getFlags());

	SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());

	// Insert the loaded element into the appropriate place in the vector.
	SDValue Insert =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, CastVT, PassThru, Load, VecIndex);
	Insert = DAG.getBitcast(VT, Insert);
	return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
	}

	static SDValue
	combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	assert(ML->isUnindexed() && "Unexpected indexed masked load!");
	if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()))
	return SDValue();

	SDLoc DL(ML);
	EVT VT = ML->getValueType(0);

	// If we are loading the first and last elements of a vector, it is safe and
	// always faster to load the whole vector. Replace the masked load with a
	// vector load and select.
	unsigned NumElts = VT.getVectorNumElements();
	BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask());
	bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0));
	bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1));
	if (LoadFirstElt && LoadLastElt) {
	SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
	ML->getMemOperand());
	SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
	ML->getPassThru());
	return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
	}

	// Convert a masked load with a constant mask into a masked load and a select.
	// This allows the select operation to use a faster kind of select instruction
	// (for example, vblendvps -> vblendps).

	// Don't try this if the pass-through operand is already undefined. That would
	// cause an infinite loop because that's what we're about to create.
	if (ML->getPassThru().isUndef())
	return SDValue();

	if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode()))
	return SDValue();

	// The new masked load has an undef pass-through operand. The select uses the
	// original pass-through operand.
	SDValue NewML = DAG.getMaskedLoad(
	VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(),
	DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(),
	ML->getAddressingMode(), ML->getExtensionType());
	SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
	ML->getPassThru());

	return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
	}

	static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	auto *Mld = cast<MaskedLoadSDNode>(N);

	// TODO: Expanding load with constant mask may be optimized as well.
	if (Mld->isExpandingLoad())
	return SDValue();

	if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
	if (SDValue ScalarLoad =
	reduceMaskedLoadToScalarLoad(Mld, DAG, DCI, Subtarget))
	return ScalarLoad;

	// TODO: Do some AVX512 subsets benefit from this transform?
	if (!Subtarget.hasAVX512())
	if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
	return Blend;
	}

	// If the mask value has been legalized to a non-boolean vector, try to
	// simplify ops leading up to it. We only demand the MSB of each lane.
	SDValue Mask = Mld->getMask();
	if (Mask.getScalarValueSizeInBits() != 1) {
	EVT VT = Mld->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
	if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}
	if (SDValue NewMask =
	TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG))
	return DAG.getMaskedLoad(
	VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(),
	NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(),
	Mld->getAddressingMode(), Mld->getExtensionType());
	}

	return SDValue();
	}

	/// If exactly one element of the mask is set for a non-truncating masked store,
	/// it is a vector extract and scalar store.
	/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
	/// mask have already been optimized in IR, so we don't bother with those here.
	static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
	// However, some target hooks may need to be added to know when the transform
	// is profitable. Endianness would also have to be considered.

	SDValue Addr, VecIndex;
	Align Alignment;
	unsigned Offset;
	if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset))
	return SDValue();

	// Extract the one scalar element that is actually being stored.
	SDLoc DL(MS);
	SDValue Value = MS->getValue();
	EVT VT = Value.getValueType();
	EVT EltVT = VT.getVectorElementType();
	if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
	EltVT = MVT::f64;
	EVT CastVT = VT.changeVectorElementType(EltVT);
	Value = DAG.getBitcast(CastVT, Value);
	}
	SDValue Extract =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex);

	// Store that element at the appropriate offset from the base pointer.
	return DAG.getStore(MS->getChain(), DL, Extract, Addr,
	MS->getPointerInfo().getWithOffset(Offset),
	Alignment, MS->getMemOperand()->getFlags());
	}

	static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
	if (Mst->isCompressingStore())
	return SDValue();

	EVT VT = Mst->getValue().getValueType();
	SDLoc dl(Mst);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (Mst->isTruncatingStore())
	return SDValue();

	if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
	return ScalarStore;

	// If the mask value has been legalized to a non-boolean vector, try to
	// simplify ops leading up to it. We only demand the MSB of each lane.
	SDValue Mask = Mst->getMask();
	if (Mask.getScalarValueSizeInBits() != 1) {
	APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
	if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}
	if (SDValue NewMask =
	TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG))
	return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(),
	Mst->getBasePtr(), Mst->getOffset(), NewMask,
	Mst->getMemoryVT(), Mst->getMemOperand(),
	Mst->getAddressingMode());
	}

	SDValue Value = Mst->getValue();
	if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
	TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
	Mst->getMemoryVT())) {
	return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
	Mst->getBasePtr(), Mst->getOffset(), Mask,
	Mst->getMemoryVT(), Mst->getMemOperand(),
	Mst->getAddressingMode(), true);
	}

	return SDValue();
	}

	static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	StoreSDNode *St = cast<StoreSDNode>(N);
	EVT StVT = St->getMemoryVT();
	SDLoc dl(St);
	SDValue StoredVal = St->getValue();
	EVT VT = StoredVal.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Convert a store of vXi1 into a store of iX and a bitcast.
	if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() &&
	VT.getVectorElementType() == MVT::i1) {

	EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
	StoredVal = DAG.getBitcast(NewVT, StoredVal);

	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}

	// If this is a store of a scalar_to_vector to v1i1, just use a scalar store.
	// This will avoid a copy to k-register.
	if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
	StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	StoredVal.getOperand(0).getValueType() == MVT::i8) {
	SDValue Val = StoredVal.getOperand(0);
	// We must store zeros to the unused bits.
	Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
	return DAG.getStore(St->getChain(), dl, Val,
	St->getBasePtr(), St->getPointerInfo(),
	St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}

	// Widen v2i1/v4i1 stores to v8i1.
	if ((VT == MVT::v1i1 \|\| VT == MVT::v2i1 \|\| VT == MVT::v4i1) && VT == StVT &&
	Subtarget.hasAVX512()) {
	unsigned NumConcats = 8 / VT.getVectorNumElements();
	// We must store zeros to the unused bits.
	SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
	Ops[0] = StoredVal;
	StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}

	// Turn vXi1 stores of constants into a scalar store.
	if ((VT == MVT::v8i1 \|\| VT == MVT::v16i1 \|\| VT == MVT::v32i1 \|\|
	VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
	ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
	// If its a v64i1 store without 64-bit support, we need two stores.
	if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {
	SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
	StoredVal->ops().slice(0, 32));
	Lo = combinevXi1ConstantToInteger(Lo, DAG);
	SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
	StoredVal->ops().slice(32, 32));
	Hi = combinevXi1ConstantToInteger(Hi, DAG);

	SDValue Ptr0 = St->getBasePtr();
	SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(4), dl);

	SDValue Ch0 =
	DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
	St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	SDValue Ch1 =
	DAG.getStore(St->getChain(), dl, Hi, Ptr1,
	St->getPointerInfo().getWithOffset(4),
	St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
	}

	StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG);
	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}

	// If we are saving a 32-byte vector and 32-byte stores are slow, such as on
	// Sandy Bridge, perform two 16-byte stores.
	unsigned Fast;
	if (VT.is256BitVector() && StVT == VT &&
	TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
	*St->getMemOperand(), &Fast) &&
	!Fast) {
	unsigned NumElems = VT.getVectorNumElements();
	if (NumElems < 2)
	return SDValue();

	return splitVectorStore(St, DAG);
	}

	// Split under-aligned vector non-temporal stores.
	if (St->isNonTemporal() && StVT == VT &&
	St->getAlign().value() < VT.getStoreSize()) {
	// ZMM/YMM nt-stores - either it can be stored as a series of shorter
	// vectors or the legalizer can scalarize it to use MOVNTI.
	if (VT.is256BitVector() \|\| VT.is512BitVector()) {
	unsigned NumElems = VT.getVectorNumElements();
	if (NumElems < 2)
	return SDValue();
	return splitVectorStore(St, DAG);
	}

	// XMM nt-stores - scalarize this to f64 nt-stores on SSE4A, else i32/i64
	// to use MOVNTI.
	if (VT.is128BitVector() && Subtarget.hasSSE2()) {
	MVT NTVT = Subtarget.hasSSE4A()
	? MVT::v2f64
	: (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32);
	return scalarizeVectorStore(St, NTVT, DAG);
	}
	}

	// Try to optimize v16i16->v16i8 truncating stores when BWI is not
	// supported, but avx512f is by extending to v16i32 and truncating.
	if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() &&
	St->getValue().getOpcode() == ISD::TRUNCATE &&
	St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
	TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
	St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
	SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
	St->getValue().getOperand(0));
	return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
	MVT::v16i8, St->getMemOperand());
	}

	// Try to fold a VTRUNCUS or VTRUNCS into a truncating store.
	if (!St->isTruncatingStore() &&
	(StoredVal.getOpcode() == X86ISD::VTRUNCUS \|\|
	StoredVal.getOpcode() == X86ISD::VTRUNCS) &&
	StoredVal.hasOneUse() &&
	TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
	bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
	return EmitTruncSStore(IsSigned, St->getChain(),
	dl, StoredVal.getOperand(0), St->getBasePtr(),
	VT, St->getMemOperand(), DAG);
	}

	// Try to fold a extract_element(VTRUNC) pattern into a truncating store.
	if (!St->isTruncatingStore()) {
	auto IsExtractedElement = [](SDValue V) {
	if (V.getOpcode() == ISD::TRUNCATE && V.hasOneUse())
	V = V.getOperand(0);
	unsigned Opc = V.getOpcode();
	if ((Opc == ISD::EXTRACT_VECTOR_ELT \|\| Opc == X86ISD::PEXTRW) &&
	isNullConstant(V.getOperand(1)) && V.hasOneUse() &&
	V.getOperand(0).hasOneUse())
	return V.getOperand(0);
	return SDValue();
	};
	if (SDValue Extract = IsExtractedElement(StoredVal)) {
	SDValue Trunc = peekThroughOneUseBitcasts(Extract);
	if (Trunc.getOpcode() == X86ISD::VTRUNC) {
	SDValue Src = Trunc.getOperand(0);
	MVT DstVT = Trunc.getSimpleValueType();
	MVT SrcVT = Src.getSimpleValueType();
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	unsigned NumTruncBits = DstVT.getScalarSizeInBits() * NumSrcElts;
	MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts);
	if (NumTruncBits == VT.getSizeInBits() &&
	TLI.isTruncStoreLegal(SrcVT, TruncVT)) {
	return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(),
	TruncVT, St->getMemOperand());
	}
	}
	}
	}

	// Optimize trunc store (of multiple scalars) to shuffle and store.
	// First, pack all of the elements in one place. Next, store to memory
	// in fewer chunks.
	if (St->isTruncatingStore() && VT.isVector()) {
	// Check if we can detect an AVG pattern from the truncation. If yes,
	// replace the trunc store by a normal store with the result of X86ISD::AVG
	// instruction.
	if (DCI.isBeforeLegalize() \|\| TLI.isTypeLegal(St->getMemoryVT()))
	if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
	Subtarget, dl))
	return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());

	if (TLI.isTruncStoreLegal(VT, StVT)) {
	if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT()))
	return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
	dl, Val, St->getBasePtr(),
	St->getMemoryVT(), St->getMemOperand(), DAG);
	if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(),
	DAG, dl))
	return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
	dl, Val, St->getBasePtr(),
	St->getMemoryVT(), St->getMemOperand(), DAG);
	}

	return SDValue();
	}

	// Cast ptr32 and ptr64 pointers to the default address space before a store.
	unsigned AddrSpace = St->getAddressSpace();
	if (AddrSpace == X86AS::PTR64 \|\| AddrSpace == X86AS::PTR32_SPTR \|\|
	AddrSpace == X86AS::PTR32_UPTR) {
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	if (PtrVT != St->getBasePtr().getSimpleValueType()) {
	SDValue Cast =
	DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0);
	return DAG.getStore(St->getChain(), dl, StoredVal, Cast,
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags(), St->getAAInfo());
	}
	}

	// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
	// the FP state in cases where an emms may be missing.
	// A preferable solution to the general problem is to figure out the right
	// places to insert EMMS. This qualifies as a quick hack.

	// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
	if (VT.getSizeInBits() != 64)
	return SDValue();

	const Function &F = DAG.getMachineFunction().getFunction();
	bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
	bool F64IsLegal =
	!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
	if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) &&
	isa<LoadSDNode>(St->getValue()) &&
	cast<LoadSDNode>(St->getValue())->isSimple() &&
	St->getChain().hasOneUse() && St->isSimple()) {
	LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode());

	if (!ISD::isNormalLoad(Ld))
	return SDValue();

	// Avoid the transformation if there are multiple uses of the loaded value.
	if (!Ld->hasNUsesOfValue(1, 0))
	return SDValue();

	SDLoc LdDL(Ld);
	SDLoc StDL(N);
	// Lower to a single movq load/store pair.
	SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(),
	Ld->getBasePtr(), Ld->getMemOperand());

	// Make sure new load is placed in same chain order.
	DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
	return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(),
	St->getMemOperand());
	}

	// This is similar to the above case, but here we handle a scalar 64-bit
	// integer store that is extracted from a vector on a 32-bit target.
	// If we have SSE2, then we can treat it like a floating-point double
	// to get past legalization. The execution dependencies fixup pass will
	// choose the optimal machine instruction for the store if this really is
	// an integer or v2f32 rather than an f64.
	if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() &&
	St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	SDValue OldExtract = St->getOperand(1);
	SDValue ExtOp0 = OldExtract.getOperand(0);
	unsigned VecSize = ExtOp0.getValueSizeInBits();
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64);
	SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0);
	SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
	BitCast, OldExtract.getOperand(1));
	return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
	St->getPointerInfo(), St->getOriginalAlign(),
	St->getMemOperand()->getFlags());
	}

	return SDValue();
	}

	static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	auto *St = cast<MemIntrinsicSDNode>(N);

	SDValue StoredVal = N->getOperand(1);
	MVT VT = StoredVal.getSimpleValueType();
	EVT MemVT = St->getMemoryVT();

	// Figure out which elements we demand.
	unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits();
	APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}

	return SDValue();
	}

	/// Return 'true' if this vector operation is "horizontal"
	/// and return the operands for the horizontal operation in LHS and RHS. A
	/// horizontal operation performs the binary operation on successive elements
	/// of its first operand, then on successive elements of its second operand,
	/// returning the resulting values in a vector. For example, if
	/// A = < float a0, float a1, float a2, float a3 >
	/// and
	/// B = < float b0, float b1, float b2, float b3 >
	/// then the result of doing a horizontal operation on A and B is
	/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
	/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
	/// A horizontal-op B, for some already available A and B, and if so then LHS is
	/// set to A, RHS to B, and the routine returns 'true'.
	static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
	SelectionDAG &DAG, const X86Subtarget &Subtarget,
	bool IsCommutative,
	SmallVectorImpl<int> &PostShuffleMask) {
	// If either operand is undef, bail out. The binop should be simplified.
	if (LHS.isUndef() \|\| RHS.isUndef())
	return false;

	// Look for the following pattern:
	// A = < float a0, float a1, float a2, float a3 >
	// B = < float b0, float b1, float b2, float b3 >
	// and
	// LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
	// RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
	// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
	// which is A horizontal-op B.

	MVT VT = LHS.getSimpleValueType();
	assert((VT.is128BitVector() \|\| VT.is256BitVector()) &&
	"Unsupported vector type for horizontal add/sub");
	unsigned NumElts = VT.getVectorNumElements();

	auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1,
	SmallVectorImpl<int> &ShuffleMask) {
	bool UseSubVector = false;
	if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Op.getOperand(0).getValueType().is256BitVector() &&
	llvm::isNullConstant(Op.getOperand(1))) {
	Op = Op.getOperand(0);
	UseSubVector = true;
	}
	SmallVector<SDValue, 2> SrcOps;
	SmallVector<int, 16> SrcMask, ScaledMask;
	SDValue BC = peekThroughBitcasts(Op);
	if (getTargetShuffleInputs(BC, SrcOps, SrcMask, DAG) &&
	!isAnyZero(SrcMask) && all_of(SrcOps, [BC](SDValue Op) {
	return Op.getValueSizeInBits() == BC.getValueSizeInBits();
	})) {
	resolveTargetShuffleInputsAndMask(SrcOps, SrcMask);
	if (!UseSubVector && SrcOps.size() <= 2 &&
	scaleShuffleElements(SrcMask, NumElts, ScaledMask)) {
	N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
	N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
	ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end());
	}
	if (UseSubVector && SrcOps.size() == 1 &&
	scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask)) {
	std::tie(N0, N1) = DAG.SplitVector(SrcOps[0], SDLoc(Op));
	ArrayRef<int> Mask = ArrayRef<int>(ScaledMask).slice(0, NumElts);
	ShuffleMask.assign(Mask.begin(), Mask.end());
	}
	}
	};

	// View LHS in the form
	// LHS = VECTOR_SHUFFLE A, B, LMask
	// If LHS is not a shuffle, then pretend it is the identity shuffle:
	// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
	// NOTE: A default initialized SDValue represents an UNDEF of type VT.
	SDValue A, B;
	SmallVector<int, 16> LMask;
	GetShuffle(LHS, A, B, LMask);

	// Likewise, view RHS in the form
	// RHS = VECTOR_SHUFFLE C, D, RMask
	SDValue C, D;
	SmallVector<int, 16> RMask;
	GetShuffle(RHS, C, D, RMask);

	// At least one of the operands should be a vector shuffle.
	unsigned NumShuffles = (LMask.empty() ? 0 : 1) + (RMask.empty() ? 0 : 1);
	if (NumShuffles == 0)
	return false;

	if (LMask.empty()) {
	A = LHS;
	for (unsigned i = 0; i != NumElts; ++i)
	LMask.push_back(i);
	}

	if (RMask.empty()) {
	C = RHS;
	for (unsigned i = 0; i != NumElts; ++i)
	RMask.push_back(i);
	}

	// If we have an unary mask, ensure the other op is set to null.
	if (isUndefOrInRange(LMask, 0, NumElts))
	B = SDValue();
	else if (isUndefOrInRange(LMask, NumElts, NumElts * 2))
	A = SDValue();

	if (isUndefOrInRange(RMask, 0, NumElts))
	D = SDValue();
	else if (isUndefOrInRange(RMask, NumElts, NumElts * 2))
	C = SDValue();

	// If A and B occur in reverse order in RHS, then canonicalize by commuting
	// RHS operands and shuffle mask.
	if (A != C) {
	std::swap(C, D);
	ShuffleVectorSDNode::commuteMask(RMask);
	}
	// Check that the shuffles are both shuffling the same vectors.
	if (!(A == C && B == D))
	return false;

	PostShuffleMask.clear();
	PostShuffleMask.append(NumElts, SM_SentinelUndef);

	// LHS and RHS are now:
	// LHS = shuffle A, B, LMask
	// RHS = shuffle A, B, RMask
	// Check that the masks correspond to performing a horizontal operation.
	// AVX defines horizontal add/sub to operate independently on 128-bit lanes,
	// so we just repeat the inner loop if this is a 256-bit op.
	unsigned Num128BitChunks = VT.getSizeInBits() / 128;
	unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
	unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
	assert((NumEltsPer128BitChunk % 2 == 0) &&
	"Vector type should have an even number of elements in each lane");
	for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
	for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
	// Ignore undefined components.
	int LIdx = LMask[i + j], RIdx = RMask[i + j];
	if (LIdx < 0 \|\| RIdx < 0 \|\|
	(!A.getNode() && (LIdx < (int)NumElts \|\| RIdx < (int)NumElts)) \|\|
	(!B.getNode() && (LIdx >= (int)NumElts \|\| RIdx >= (int)NumElts)))
	continue;

	// Check that successive odd/even elements are being operated on. If not,
	// this is not a horizontal operation.
	if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) &&
	!((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative))
	return false;

	// Compute the post-shuffle mask index based on where the element
	// is stored in the HOP result, and where it needs to be moved to.
	int Base = LIdx & ~1u;
	int Index = ((Base % NumEltsPer128BitChunk) / 2) +
	((Base % NumElts) & ~(NumEltsPer128BitChunk - 1));

	// The low half of the 128-bit result must choose from A.
	// The high half of the 128-bit result must choose from B,
	// unless B is undef. In that case, we are always choosing from A.
	if ((B && Base >= (int)NumElts) \|\| (!B && i >= NumEltsPer64BitChunk))
	Index += NumEltsPer64BitChunk;
	PostShuffleMask[i + j] = Index;
	}
	}

	SDValue NewLHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
	SDValue NewRHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.

	bool IsIdentityPostShuffle =
	isSequentialOrUndefInRange(PostShuffleMask, 0, NumElts, 0);
	if (IsIdentityPostShuffle)
	PostShuffleMask.clear();

	// Avoid 128-bit multi lane shuffles if pre-AVX2 and FP (integer will split).
	if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() &&
	isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask))
	return false;

	// If the source nodes are already used in HorizOps then always accept this.
	// Shuffle folding should merge these back together.
	bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) {
	return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
	});
	bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) {
	return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
	});
	bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS;

	// Assume a SingleSource HOP if we only shuffle one input and don't need to
	// shuffle the result.
	if (!ForceHorizOp &&
	!shouldUseHorizontalOp(NewLHS == NewRHS &&
	(NumShuffles < 2 \|\| !IsIdentityPostShuffle),
	DAG, Subtarget))
	return false;

	LHS = DAG.getBitcast(VT, NewLHS);
	RHS = DAG.getBitcast(VT, NewRHS);
	return true;
	}

	// Try to synthesize horizontal (f)hadd/hsub from (f)adds/subs of shuffles.
	static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	unsigned Opcode = N->getOpcode();
	bool IsAdd = (Opcode == ISD::FADD) \|\| (Opcode == ISD::ADD);
	SmallVector<int, 8> PostShuffleMask;

	switch (Opcode) {
	case ISD::FADD:
	case ISD::FSUB:
	if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 \|\| VT == MVT::v2f64)) \|\|
	(Subtarget.hasAVX() && (VT == MVT::v8f32 \|\| VT == MVT::v4f64))) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB;
	if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd,
	PostShuffleMask)) {
	SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
	if (!PostShuffleMask.empty())
	HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
	DAG.getUNDEF(VT), PostShuffleMask);
	return HorizBinOp;
	}
	}
	break;
	case ISD::ADD:
	case ISD::SUB:
	if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v16i16 \|\| VT == MVT::v8i32)) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB;
	if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd,
	PostShuffleMask)) {
	auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
	};
	SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
	{LHS, RHS}, HOpBuilder);
	if (!PostShuffleMask.empty())
	HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
	DAG.getUNDEF(VT), PostShuffleMask);
	return HorizBinOp;
	}
	}
	break;
	}

	return SDValue();
	}

	// Try to combine the following nodes
	// t29: i64 = X86ISD::Wrapper TargetConstantPool:i64
	// <i32 -2147483648[float -0.000000e+00]> 0
	// t27: v16i32[v16f32],ch = X86ISD::VBROADCAST_LOAD
	// <(load 4 from constant-pool)> t0, t29
	// [t30: v16i32 = bitcast t27]
	// t6: v16i32 = xor t7, t27[t30]
	// t11: v16f32 = bitcast t6
	// t21: v16f32 = X86ISD::VFMULC[X86ISD::VCFMULC] t11, t8
	// into X86ISD::VFCMULC[X86ISD::VFMULC] if possible:
	// t22: v16f32 = bitcast t7
	// t23: v16f32 = X86ISD::VFCMULC[X86ISD::VFMULC] t8, t22
	// t24: v32f16 = bitcast t23
	static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	int CombineOpcode =
	N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;
	auto isConjugationConstant = [](const Constant *c) {
	if (const auto *CI = dyn_cast<ConstantInt>(c)) {
	APInt ConjugationInt32 = APInt(32, 0x80000000, true);
	APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);
	switch (CI->getBitWidth()) {
	case 16:
	return false;
	case 32:
	return CI->getValue() == ConjugationInt32;
	case 64:
	return CI->getValue() == ConjugationInt64;
	default:
	llvm_unreachable("Unexpected bit width");
	}
	}
	if (const auto *CF = dyn_cast<ConstantFP>(c))
	return CF->isNegativeZeroValue();
	return false;
	};
	auto combineConjugation = [&](SDValue &r) {
	if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) {
	SDValue XOR = LHS.getOperand(0);
	if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) {
	SDValue XORRHS = XOR.getOperand(1);
	if (XORRHS.getOpcode() == ISD::BITCAST && XORRHS.hasOneUse())
	XORRHS = XORRHS.getOperand(0);
	if (XORRHS.getOpcode() == X86ISD::VBROADCAST_LOAD &&
	XORRHS.getOperand(1).getNumOperands()) {
	ConstantPoolSDNode *CP =
	dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0));
	if (CP && isConjugationConstant(CP->getConstVal())) {
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
	SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));
	SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);
	r = DAG.getBitcast(VT, FCMulC);
	return true;
	}
	}
	}
	}
	return false;
	};
	SDValue Res;
	if (combineConjugation(Res))
	return Res;
	std::swap(LHS, RHS);
	if (combineConjugation(Res))
	return Res;
	return Res;
	}

	// Try to combine the following nodes:
	// FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C)) to FMA(B, C, A)
	static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	auto AllowContract = [&DAG](const SDNodeFlags &Flags) {
	return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
	Flags.hasAllowContract();
	};

	auto HasNoSignedZero = [&DAG](const SDNodeFlags &Flags) {
	return DAG.getTarget().Options.NoSignedZerosFPMath \|\|
	Flags.hasNoSignedZeros();
	};
	auto IsVectorAllNegativeZero = [](const SDNode *N) {
	if (N->getOpcode() != X86ISD::VBROADCAST_LOAD)
	return false;
	assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 &&
	"Unexpected vector type!");
	if (ConstantPoolSDNode *CP =
	dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) {
	APInt AI = APInt(32, 0x80008000, true);
	if (const auto *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
	return CI->getValue() == AI;
	if (const auto *CF = dyn_cast<ConstantFP>(CP->getConstVal()))
	return CF->getValue() == APFloat(APFloat::IEEEsingle(), AI);
	}
	return false;
	};

	if (N->getOpcode() != ISD::FADD \|\| !Subtarget.hasFP16() \|\|
	!AllowContract(N->getFlags()))
	return SDValue();

	EVT VT = N->getValueType(0);
	if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)
	return SDValue();

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	bool IsConj;
	SDValue FAddOp1, MulOp0, MulOp1;
	auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
	&IsVectorAllNegativeZero,
	&HasNoSignedZero](SDValue N) -> bool {
	if (!N.hasOneUse() \|\| N.getOpcode() != ISD::BITCAST)
	return false;
	SDValue Op0 = N.getOperand(0);
	unsigned Opcode = Op0.getOpcode();
	if (Op0.hasOneUse() && AllowContract(Op0->getFlags())) {
	if ((Opcode == X86ISD::VFMULC \|\| Opcode == X86ISD::VFCMULC)) {
	MulOp0 = Op0.getOperand(0);
	MulOp1 = Op0.getOperand(1);
	IsConj = Opcode == X86ISD::VFCMULC;
	return true;
	}
	if ((Opcode == X86ISD::VFMADDC \|\| Opcode == X86ISD::VFCMADDC) &&
	((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) &&
	HasNoSignedZero(Op0->getFlags())) \|\|
	IsVectorAllNegativeZero(Op0->getOperand(2).getNode()))) {
	MulOp0 = Op0.getOperand(0);
	MulOp1 = Op0.getOperand(1);
	IsConj = Opcode == X86ISD::VFCMADDC;
	return true;
	}
	}
	return false;
	};

	if (GetCFmulFrom(LHS))
	FAddOp1 = RHS;
	else if (GetCFmulFrom(RHS))
	FAddOp1 = LHS;
	else
	return SDValue();

	MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2);
	FAddOp1 = DAG.getBitcast(CVT, FAddOp1);
	unsigned NewOp = IsConj ? X86ISD::VFCMADDC : X86ISD::VFMADDC;
	// FIXME: How do we handle when fast math flags of FADD are different from
	// CFMUL's?
	SDValue CFmul =
	DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags());
	return DAG.getBitcast(VT, CFmul);
	}

	/// Do target-specific dag combines on floating-point adds/subs.
	static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget))
	return HOp;

	if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget))
	return COp;

	return SDValue();
	}

	/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
	/// the codegen.
	/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
	/// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove
	/// anything that is guaranteed to be transformed by DAGCombiner.
	static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	const SDLoc &DL) {
	assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
	SDValue Src = N->getOperand(0);
	unsigned SrcOpcode = Src.getOpcode();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	EVT VT = N->getValueType(0);
	EVT SrcVT = Src.getValueType();

	auto IsFreeTruncation = [VT](SDValue Op) {
	unsigned TruncSizeInBits = VT.getScalarSizeInBits();

	// See if this has been extended from a smaller/equal size to
	// the truncation size, allowing a truncation to combine with the extend.
	unsigned Opcode = Op.getOpcode();
	if ((Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND \|\|
	Opcode == ISD::ZERO_EXTEND) &&
	Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
	return true;

	// See if this is a single use constant which can be constant folded.
	// NOTE: We don't peek throught bitcasts here because there is currently
	// no support for constant folding truncate+bitcast+vector_of_constants. So
	// we'll just send up with a truncate on both operands which will
	// get turned back into (truncate (binop)) causing an infinite loop.
	return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
	};

	auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
	SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
	SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
	return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1);
	};

	// Don't combine if the operation has other uses.
	if (!Src.hasOneUse())
	return SDValue();

	// Only support vector truncation for now.
	// TODO: i64 scalar math would benefit as well.
	if (!VT.isVector())
	return SDValue();

	// In most cases its only worth pre-truncating if we're only facing the cost
	// of one truncation.
	// i.e. if one of the inputs will constant fold or the input is repeated.
	switch (SrcOpcode) {
	case ISD::MUL:
	// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
	// better to truncate if we have the chance.
	if (SrcVT.getScalarType() == MVT::i64 &&
	TLI.isOperationLegal(SrcOpcode, VT) &&
	!TLI.isOperationLegal(SrcOpcode, SrcVT))
	return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
	[[fallthrough]];
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR:
	case ISD::ADD:
	case ISD::SUB: {
	SDValue Op0 = Src.getOperand(0);
	SDValue Op1 = Src.getOperand(1);
	if (TLI.isOperationLegal(SrcOpcode, VT) &&
	(Op0 == Op1 \|\| IsFreeTruncation(Op0) \|\| IsFreeTruncation(Op1)))
	return TruncateArithmetic(Op0, Op1);
	break;
	}
	}

	return SDValue();
	}

	/// Truncate using ISD::AND mask and X86ISD::PACKUS.
	/// e.g. trunc <8 x i32> X to <8 x i16> -->
	/// MaskX = X & 0xffff (clear high bits to prevent saturation)
	/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
	static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();
	EVT OutVT = N->getValueType(0);

	APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
	OutVT.getScalarSizeInBits());
	In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
	return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
	}

	/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
	static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();
	EVT OutVT = N->getValueType(0);
	In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In,
	DAG.getValueType(OutVT));
	return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget);
	}

	/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
	/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
	/// legalization the truncation will be translated into a BUILD_VECTOR with each
	/// element that is extracted from a vector and then truncated, and it is
	/// difficult to do this optimization based on them.
	static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT OutVT = N->getValueType(0);
	if (!OutVT.isVector())
	return SDValue();

	SDValue In = N->getOperand(0);
	if (!In.getValueType().isSimple())
	return SDValue();

	EVT InVT = In.getValueType();
	unsigned NumElems = OutVT.getVectorNumElements();

	// AVX512 provides fast truncate ops.
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512())
	return SDValue();

	EVT OutSVT = OutVT.getVectorElementType();
	EVT InSVT = InVT.getVectorElementType();
	if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&
	(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
	NumElems >= 8))
	return SDValue();

	// SSSE3's pshufb results in less instructions in the cases below.
	if (Subtarget.hasSSSE3() && NumElems == 8) {
	if (InSVT == MVT::i16)
	return SDValue();
	if (InSVT == MVT::i32 &&
	(OutSVT == MVT::i8 \|\| !Subtarget.hasSSE41() \|\| Subtarget.hasInt256()))
	return SDValue();
	}

	SDLoc DL(N);
	// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
	// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
	// truncate 2 x v4i32 to v8i16.
	if (Subtarget.hasSSE41() \|\| OutSVT == MVT::i8)
	return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG);
	if (InSVT == MVT::i32)
	return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG);

	return SDValue();
	}

	/// This function transforms vector truncation of 'extended sign-bits' or
	/// 'extended zero-bits' values.
	/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
	static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Requires SSE2.
	if (!Subtarget.hasSSE2())
	return SDValue();

	if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple())
	return SDValue();

	SDValue In = N->getOperand(0);
	if (!In.getValueType().isSimple())
	return SDValue();

	MVT VT = N->getValueType(0).getSimpleVT();
	MVT SVT = VT.getScalarType();

	MVT InVT = In.getValueType().getSimpleVT();
	MVT InSVT = InVT.getScalarType();

	// Check we have a truncation suited for PACKSS/PACKUS.
	if (!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();
	if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
	return SDValue();
	if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
	return SDValue();

	// Truncation to sub-128bit vXi32 can be better handled with shuffles.
	if (SVT == MVT::i32 && VT.getSizeInBits() < 128)
	return SDValue();

	// AVX512 has fast truncate, but if the input is already going to be split,
	// there's no harm in trying pack.
	if (Subtarget.hasAVX512() &&
	!(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
	InVT.is512BitVector())) {
	// PACK should still be worth it for 128-bit vectors if the sources were
	// originally concatenated from subvectors.
	SmallVector<SDValue> ConcatOps;
	if (VT.getSizeInBits() > 128 \|\|
	!collectConcatOps(In.getNode(), ConcatOps, DAG))
	return SDValue();
	}

	unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
	unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;

	// Use PACKUS if the input has zero-bits that extend all the way to the
	// packed/truncated value. e.g. masks, zext_in_reg, etc.
	KnownBits Known = DAG.computeKnownBits(In);
	unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
	if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))
	return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);

	// Use PACKSS if the input has sign-bits that extend all the way to the
	// packed/truncated value. e.g. Comparison result, sext_in_reg, etc.
	unsigned NumSignBits = DAG.ComputeNumSignBits(In);

	// Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with
	// a sign splat. ComputeNumSignBits struggles to see through BITCASTs later
	// on and combines/simplifications can't then use it.
	if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
	return SDValue();

	unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
	if (NumSignBits > MinSignBits)
	return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);

	// If we have a srl that only generates signbits that we will discard in
	// the truncation then we can use PACKSS by converting the srl to a sra.
	// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
	if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
	if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
	In, APInt::getAllOnes(VT.getVectorNumElements()))) {
	if (*ShAmt == MinSignBits) {
	SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
	return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
	Subtarget);
	}
	}

	return SDValue();
	}

	// Try to form a MULHU or MULHS node by looking for
	// (trunc (srl (mul ext, ext), 16))
	// TODO: This is X86 specific because we want to be able to handle wide types
	// before type legalization. But we can only do it if the vector will be
	// legalized via widening/splitting. Type legalization can't handle promotion
	// of a MULHU/MULHS. There isn't a way to convey this to the generic DAG
	// combiner.
	static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
	SelectionDAG &DAG, const X86Subtarget &Subtarget) {
	// First instruction should be a right shift of a multiply.
	if (Src.getOpcode() != ISD::SRL \|\|
	Src.getOperand(0).getOpcode() != ISD::MUL)
	return SDValue();

	if (!Subtarget.hasSSE2())
	return SDValue();

	// Only handle vXi16 types that are at least 128-bits unless they will be
	// widened.
	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i16)
	return SDValue();

	// Input type should be at least vXi32.
	EVT InVT = Src.getValueType();
	if (InVT.getVectorElementType().getSizeInBits() < 32)
	return SDValue();

	// Need a shift by 16.
	APInt ShiftAmt;
	if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) \|\|
	ShiftAmt != 16)
	return SDValue();

	SDValue LHS = Src.getOperand(0).getOperand(0);
	SDValue RHS = Src.getOperand(0).getOperand(1);

	// Count leading sign/zero bits on both inputs - if there are enough then
	// truncation back to vXi16 will be cheap - either as a pack/shuffle
	// sequence or using AVX512 truncations. If the inputs are sext/zext then the
	// truncations may actually be free by peeking through to the ext source.
	auto IsSext = [&DAG](SDValue V) {
	return DAG.ComputeMaxSignificantBits(V) <= 16;
	};
	auto IsZext = [&DAG](SDValue V) {
	return DAG.computeKnownBits(V).countMaxActiveBits() <= 16;
	};

	bool IsSigned = IsSext(LHS) && IsSext(RHS);
	bool IsUnsigned = IsZext(LHS) && IsZext(RHS);
	if (!IsSigned && !IsUnsigned)
	return SDValue();

	// Check if both inputs are extensions, which will be removed by truncation.
	bool IsTruncateFree = (LHS.getOpcode() == ISD::SIGN_EXTEND \|\|
	LHS.getOpcode() == ISD::ZERO_EXTEND) &&
	(RHS.getOpcode() == ISD::SIGN_EXTEND \|\|
	RHS.getOpcode() == ISD::ZERO_EXTEND) &&
	LHS.getOperand(0).getScalarValueSizeInBits() <= 16 &&
	RHS.getOperand(0).getScalarValueSizeInBits() <= 16;

	// For AVX2+ targets, with the upper bits known zero, we can perform MULHU on
	// the (bitcasted) inputs directly, and then cheaply pack/truncate the result
	// (upper elts will be zero). Don't attempt this with just AVX512F as MULHU
	// will have to split anyway.
	unsigned InSizeInBits = InVT.getSizeInBits();
	if (IsUnsigned && !IsTruncateFree && Subtarget.hasInt256() &&
	!(Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.is256BitVector()) &&
	(InSizeInBits % 16) == 0) {
	EVT BCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
	InVT.getSizeInBits() / 16);
	SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS),
	DAG.getBitcast(BCVT, RHS));
	return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res));
	}

	// Truncate back to source type.
	LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS);
	RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS);

	unsigned Opc = IsSigned ? ISD::MULHS : ISD::MULHU;
	return DAG.getNode(Opc, DL, VT, LHS, RHS);
	}

	// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes
	// from one vector with signed bytes from another vector, adds together
	// adjacent pairs of 16-bit products, and saturates the result before
	// truncating to 16-bits.
	//
	// Which looks something like this:
	// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
	// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
	static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	const SDLoc &DL) {
	if (!VT.isVector() \|\| !Subtarget.hasSSSE3())
	return SDValue();

	unsigned NumElems = VT.getVectorNumElements();
	EVT ScalarVT = VT.getVectorElementType();
	if (ScalarVT != MVT::i16 \|\| NumElems < 8 \|\| !isPowerOf2_32(NumElems))
	return SDValue();

	SDValue SSatVal = detectSSatPattern(In, VT);
	if (!SSatVal \|\| SSatVal.getOpcode() != ISD::ADD)
	return SDValue();

	// Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
	// of multiplies from even/odd elements.
	SDValue N0 = SSatVal.getOperand(0);
	SDValue N1 = SSatVal.getOperand(1);

	if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() != ISD::MUL)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	SDValue N10 = N1.getOperand(0);
	SDValue N11 = N1.getOperand(1);

	// TODO: Handle constant vectors and use knownbits/computenumsignbits?
	// Canonicalize zero_extend to LHS.
	if (N01.getOpcode() == ISD::ZERO_EXTEND)
	std::swap(N00, N01);
	if (N11.getOpcode() == ISD::ZERO_EXTEND)
	std::swap(N10, N11);

	// Ensure we have a zero_extend and a sign_extend.
	if (N00.getOpcode() != ISD::ZERO_EXTEND \|\|
	N01.getOpcode() != ISD::SIGN_EXTEND \|\|
	N10.getOpcode() != ISD::ZERO_EXTEND \|\|
	N11.getOpcode() != ISD::SIGN_EXTEND)
	return SDValue();

	// Peek through the extends.
	N00 = N00.getOperand(0);
	N01 = N01.getOperand(0);
	N10 = N10.getOperand(0);
	N11 = N11.getOperand(0);

	// Ensure the extend is from vXi8.
	if (N00.getValueType().getVectorElementType() != MVT::i8 \|\|
	N01.getValueType().getVectorElementType() != MVT::i8 \|\|
	N10.getValueType().getVectorElementType() != MVT::i8 \|\|
	N11.getValueType().getVectorElementType() != MVT::i8)
	return SDValue();

	// All inputs should be build_vectors.
	if (N00.getOpcode() != ISD::BUILD_VECTOR \|\|
	N01.getOpcode() != ISD::BUILD_VECTOR \|\|
	N10.getOpcode() != ISD::BUILD_VECTOR \|\|
	N11.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	// N00/N10 are zero extended. N01/N11 are sign extended.

	// For each element, we need to ensure we have an odd element from one vector
	// multiplied by the odd element of another vector and the even element from
	// one of the same vectors being multiplied by the even element from the
	// other vector. So we need to make sure for each element i, this operator
	// is being performed:
	// A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]
	SDValue ZExtIn, SExtIn;
	for (unsigned i = 0; i != NumElems; ++i) {
	SDValue N00Elt = N00.getOperand(i);
	SDValue N01Elt = N01.getOperand(i);
	SDValue N10Elt = N10.getOperand(i);
	SDValue N11Elt = N11.getOperand(i);
	// TODO: Be more tolerant to undefs.
	if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();
	auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
	auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
	auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
	auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
	if (!ConstN00Elt \|\| !ConstN01Elt \|\| !ConstN10Elt \|\| !ConstN11Elt)
	return SDValue();
	unsigned IdxN00 = ConstN00Elt->getZExtValue();
	unsigned IdxN01 = ConstN01Elt->getZExtValue();
	unsigned IdxN10 = ConstN10Elt->getZExtValue();
	unsigned IdxN11 = ConstN11Elt->getZExtValue();
	// Add is commutative so indices can be reordered.
	if (IdxN00 > IdxN10) {
	std::swap(IdxN00, IdxN10);
	std::swap(IdxN01, IdxN11);
	}
	// N0 indices be the even element. N1 indices must be the next odd element.
	if (IdxN00 != 2 * i \|\| IdxN10 != 2 * i + 1 \|\|
	IdxN01 != 2 * i \|\| IdxN11 != 2 * i + 1)
	return SDValue();
	SDValue N00In = N00Elt.getOperand(0);
	SDValue N01In = N01Elt.getOperand(0);
	SDValue N10In = N10Elt.getOperand(0);
	SDValue N11In = N11Elt.getOperand(0);
	// First time we find an input capture it.
	if (!ZExtIn) {
	ZExtIn = N00In;
	SExtIn = N01In;
	}
	if (ZExtIn != N00In \|\| SExtIn != N01In \|\|
	ZExtIn != N10In \|\| SExtIn != N11In)
	return SDValue();
	}

	auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	// Shrink by adding truncate nodes and let DAGCombine fold with the
	// sources.
	EVT InVT = Ops[0].getValueType();
	assert(InVT.getScalarType() == MVT::i8 &&
	"Unexpected scalar element type");
	assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
	EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
	InVT.getVectorNumElements() / 2);
	return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
	};
	return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
	PMADDBuilder);
	}

	static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(0);
	SDLoc DL(N);

	// Attempt to pre-truncate inputs to arithmetic ops instead.
	if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
	return V;

	// Try to detect AVG pattern first.
	if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
	return Avg;

	// Try to detect PMADD
	if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))
	return PMAdd;

	// Try to combine truncation with signed/unsigned saturation.
	if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
	return Val;

	// Try to combine PMULHUW/PMULHW for vXi16.
	if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget))
	return V;

	// The bitcast source is a direct mmx result.
	// Detect bitcasts between i32 to x86mmx
	if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
	SDValue BCSrc = Src.getOperand(0);
	if (BCSrc.getValueType() == MVT::x86mmx)
	return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
	}

	// Try to truncate extended sign/zero bits with PACKSS/PACKUS.
	if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
	return V;

	return combineVectorTruncation(N, DAG, Subtarget);
	}

	static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);
	SDValue In = N->getOperand(0);
	SDLoc DL(N);

	if (SDValue SSatVal = detectSSatPattern(In, VT))
	return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
	if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL))
	return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits()));
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	/// Returns the negated value if the node \p N flips sign of FP value.
	///
	/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
	/// or FSUB(0, x)
	/// AVX512F does not have FXOR, so FNEG is lowered as
	/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
	/// In this case we go though all bitcasts.
	/// This also recognizes splat of a negated value and returns the splat of that
	/// value.
	static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
	if (N->getOpcode() == ISD::FNEG)
	return N->getOperand(0);

	// Don't recurse exponentially.
	if (Depth > SelectionDAG::MaxRecursionDepth)
	return SDValue();

	unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();

	SDValue Op = peekThroughBitcasts(SDValue(N, 0));
	EVT VT = Op->getValueType(0);

	// Make sure the element size doesn't change.
	if (VT.getScalarSizeInBits() != ScalarSize)
	return SDValue();

	unsigned Opc = Op.getOpcode();
	switch (Opc) {
	case ISD::VECTOR_SHUFFLE: {
	// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
	// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
	if (!Op.getOperand(1).isUndef())
	return SDValue();
	if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1))
	if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
	return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT),
	cast<ShuffleVectorSDNode>(Op)->getMask());
	break;
	}
	case ISD::INSERT_VECTOR_ELT: {
	// Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,
	// -V, INDEX).
	SDValue InsVector = Op.getOperand(0);
	SDValue InsVal = Op.getOperand(1);
	if (!InsVector.isUndef())
	return SDValue();
	if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1))
	if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
	NegInsVal, Op.getOperand(2));
	break;
	}
	case ISD::FSUB:
	case ISD::XOR:
	case X86ISD::FXOR: {
	SDValue Op1 = Op.getOperand(1);
	SDValue Op0 = Op.getOperand(0);

	// For XOR and FXOR, we want to check if constant
	// bits of Op1 are sign bit masks. For FSUB, we
	// have to check if constant bits of Op0 are sign
	// bit masks and hence we swap the operands.
	if (Opc == ISD::FSUB)
	std::swap(Op0, Op1);

	APInt UndefElts;
	SmallVector<APInt, 16> EltBits;
	// Extract constant bits and see if they are all
	// sign bit masks. Ignore the undef elements.
	if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits,
	/* AllowWholeUndefs */ true,
	/* AllowPartialUndefs */ false)) {
	for (unsigned I = 0, E = EltBits.size(); I < E; I++)
	if (!UndefElts[I] && !EltBits[I].isSignMask())
	return SDValue();

	// Only allow bitcast from correctly-sized constant.
	Op0 = peekThroughBitcasts(Op0);
	if (Op0.getScalarValueSizeInBits() == ScalarSize)
	return Op0;
	}
	break;
	} // case
	} // switch

	return SDValue();
	}

	static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
	bool NegRes) {
	if (NegMul) {
	switch (Opcode) {
	default: llvm_unreachable("Unexpected opcode");
	case ISD::FMA: Opcode = X86ISD::FNMADD; break;
	case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break;
	case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break;
	case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
	case X86ISD::STRICT_FMSUB: Opcode = X86ISD::STRICT_FNMSUB; break;
	case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
	case X86ISD::FNMADD: Opcode = ISD::FMA; break;
	case X86ISD::STRICT_FNMADD: Opcode = ISD::STRICT_FMA; break;
	case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
	case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
	case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break;
	case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
	}
	}

	if (NegAcc) {
	switch (Opcode) {
	default: llvm_unreachable("Unexpected opcode");
	case ISD::FMA: Opcode = X86ISD::FMSUB; break;
	case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break;
	case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
	case X86ISD::FMSUB: Opcode = ISD::FMA; break;
	case X86ISD::STRICT_FMSUB: Opcode = ISD::STRICT_FMA; break;
	case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
	case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
	case X86ISD::STRICT_FNMADD: Opcode = X86ISD::STRICT_FNMSUB; break;
	case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
	case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
	case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FNMADD; break;
	case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
	case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break;
	case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break;
	case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break;
	case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break;
	}
	}

	if (NegRes) {
	switch (Opcode) {
	// For accuracy reason, we never combine fneg and fma under strict FP.
	default: llvm_unreachable("Unexpected opcode");
	case ISD::FMA: Opcode = X86ISD::FNMSUB; break;
	case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
	case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break;
	case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
	case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break;
	case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
	case X86ISD::FNMSUB: Opcode = ISD::FMA; break;
	case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
	}
	}

	return Opcode;
	}

	/// Do target-specific dag combines on floating point negations.
	static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT OrigVT = N->getValueType(0);
	SDValue Arg = isFNEG(DAG, N);
	if (!Arg)
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = Arg.getValueType();
	EVT SVT = VT.getScalarType();
	SDLoc DL(N);

	// Let legalize expand this if it isn't a legal type yet.
	if (!TLI.isTypeLegal(VT))
	return SDValue();

	// If we're negating a FMUL node on a target with FMA, then we can avoid the
	// use of a constant by performing (-0 - A*B) instead.
	// FIXME: Check rounding control flags as well once it becomes available.
	if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 \|\| SVT == MVT::f64) &&
	Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
	SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
	SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
	Arg.getOperand(1), Zero);
	return DAG.getBitcast(OrigVT, NewNode);
	}

	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
	bool LegalOperations = !DCI.isBeforeLegalizeOps();
	if (SDValue NegArg =
	TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize))
	return DAG.getBitcast(OrigVT, NegArg);

	return SDValue();
	}

	SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
	bool LegalOperations,
	bool ForCodeSize,
	NegatibleCost &Cost,
	unsigned Depth) const {
	// fneg patterns are removable even if they have multiple uses.
	if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) {
	Cost = NegatibleCost::Cheaper;
	return DAG.getBitcast(Op.getValueType(), Arg);
	}

	EVT VT = Op.getValueType();
	EVT SVT = VT.getScalarType();
	unsigned Opc = Op.getOpcode();
	SDNodeFlags Flags = Op.getNode()->getFlags();
	switch (Opc) {
	case ISD::FMA:
	case X86ISD::FMSUB:
	case X86ISD::FNMADD:
	case X86ISD::FNMSUB:
	case X86ISD::FMADD_RND:
	case X86ISD::FMSUB_RND:
	case X86ISD::FNMADD_RND:
	case X86ISD::FNMSUB_RND: {
	if (!Op.hasOneUse() \|\| !Subtarget.hasAnyFMA() \|\| !isTypeLegal(VT) \|\|
	!(SVT == MVT::f32 \|\| SVT == MVT::f64) \|\|
	!isOperationLegal(ISD::FMA, VT))
	break;

	// Don't fold (fneg (fma (fneg x), y, (fneg z))) to (fma x, y, z)
	// if it may have signed zeros.
	if (!Flags.hasNoSignedZeros())
	break;

	// This is always negatible for free but we might be able to remove some
	// extra operand negations as well.
	SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
	for (int i = 0; i != 3; ++i)
	NewOps[i] = getCheaperNegatedExpression(
	Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1);

	bool NegA = !!NewOps[0];
	bool NegB = !!NewOps[1];
	bool NegC = !!NewOps[2];
	unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true);

	Cost = (NegA \|\| NegB \|\| NegC) ? NegatibleCost::Cheaper
	: NegatibleCost::Neutral;

	// Fill in the non-negated ops with the original values.
	for (int i = 0, e = Op.getNumOperands(); i != e; ++i)
	if (!NewOps[i])
	NewOps[i] = Op.getOperand(i);
	return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps);
	}
	case X86ISD::FRCP:
	if (SDValue NegOp0 =
	getNegatedExpression(Op.getOperand(0), DAG, LegalOperations,
	ForCodeSize, Cost, Depth + 1))
	return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0);
	break;
	}

	return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
	ForCodeSize, Cost, Depth);
	}

	static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = N->getSimpleValueType(0);
	// If we have integer vector types available, use the integer opcodes.
	if (!VT.isVector() \|\| !Subtarget.hasSSE2())
	return SDValue();

	SDLoc dl(N);

	unsigned IntBits = VT.getScalarSizeInBits();
	MVT IntSVT = MVT::getIntegerVT(IntBits);
	MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);

	SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
	SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
	unsigned IntOpcode;
	switch (N->getOpcode()) {
	default: llvm_unreachable("Unexpected FP logic op");
	case X86ISD::FOR: IntOpcode = ISD::OR; break;
	case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
	case X86ISD::FAND: IntOpcode = ISD::AND; break;
	case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
	}
	SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
	return DAG.getBitcast(VT, IntOp);
	}


	/// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
	static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
	if (N->getOpcode() != ISD::XOR)
	return SDValue();

	SDValue LHS = N->getOperand(0);
	if (!isOneConstant(N->getOperand(1)) \|\| LHS->getOpcode() != X86ISD::SETCC)
	return SDValue();

	X86::CondCode NewCC = X86::GetOppositeBranchCondition(
	X86::CondCode(LHS->getConstantOperandVal(0)));
	SDLoc DL(N);
	return getSETCC(NewCC, LHS->getOperand(1), DL, DAG);
	}

	static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);

	// If this is SSE1 only convert to FXOR to avoid scalarization.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
	return DAG.getBitcast(MVT::v4i32,
	DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32,
	DAG.getBitcast(MVT::v4f32, N0),
	DAG.getBitcast(MVT::v4f32, N1)));
	}

	if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
	return Cmp;

	if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
	return R;

	if (SDValue R = combineBitOpWithShift(N, DAG))
	return R;

	if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
	return FPLogic;

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue SetCC = foldXor1SetCC(N, DAG))
	return SetCC;

	if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
	return RV;

	// Fold not(iX bitcast(vXi1)) -> (iX bitcast(not(vec))) for legal boolvecs.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST &&
	N0.getOperand(0).getValueType().isVector() &&
	N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
	TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) {
	return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0),
	N0.getOperand(0).getValueType()));
	}

	// Handle AVX512 mask widening.
	// Fold not(insert_subvector(undef,sub)) -> insert_subvector(undef,not(sub))
	if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() &&
	VT.getVectorElementType() == MVT::i1 &&
	N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() &&
	TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
	return DAG.getNode(
	ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
	DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()),
	N0.getOperand(2));
	}

	// Fold xor(zext(xor(x,c1)),c2) -> xor(zext(x),xor(zext(c1),c2))
	// Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2))
	// TODO: Under what circumstances could this be performed in DAGCombine?
	if ((N0.getOpcode() == ISD::TRUNCATE \|\| N0.getOpcode() == ISD::ZERO_EXTEND) &&
	N0.getOperand(0).getOpcode() == N->getOpcode()) {
	SDValue TruncExtSrc = N0.getOperand(0);
	auto *N1C = dyn_cast<ConstantSDNode>(N1);
	auto *N001C = dyn_cast<ConstantSDNode>(TruncExtSrc.getOperand(1));
	if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) {
	SDLoc DL(N);
	SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT);
	SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT);
	return DAG.getNode(ISD::XOR, DL, VT, LHS,
	DAG.getNode(ISD::XOR, DL, VT, RHS, N1));
	}
	}

	return combineFneg(N, DAG, DCI, Subtarget);
	}

	static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	unsigned NumBits = VT.getSizeInBits();

	// TODO - Constant Folding.

	// Simplify the inputs.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedMask(APInt::getAllOnes(NumBits));
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	static bool isNullFPScalarOrVectorConst(SDValue V) {
	return isNullFPConstant(V) \|\| ISD::isBuildVectorAllZeros(V.getNode());
	}

	/// If a value is a scalar FP zero or a vector FP zero (potentially including
	/// undefined elements), return a zero constant that may be used to fold away
	/// that value. In the case of a vector, the returned constant will not contain
	/// undefined elements even if the input parameter does. This makes it suitable
	/// to be used as a replacement operand with operations (eg, bitwise-and) where
	/// an undef should not propagate.
	static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!isNullFPScalarOrVectorConst(V))
	return SDValue();

	if (V.getValueType().isVector())
	return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));

	return V;
	}

	static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
	if (!((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::f64 && Subtarget.hasSSE2()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2())))
	return SDValue();

	auto isAllOnesConstantFP = [](SDValue V) {
	if (V.getSimpleValueType().isVector())
	return ISD::isBuildVectorAllOnes(V.getNode());
	auto *C = dyn_cast<ConstantFPSDNode>(V);
	return C && C->getConstantFPValue()->isAllOnesValue();
	};

	// fand (fxor X, -1), Y --> fandn X, Y
	if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1)))
	return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1);

	// fand X, (fxor Y, -1) --> fandn Y, X
	if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1)))
	return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0);

	return SDValue();
	}

	/// Do target-specific dag combines on X86ISD::FAND nodes.
	static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// FAND(0.0, x) -> 0.0
	if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
	return V;

	// FAND(x, 0.0) -> 0.0
	if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
	return V;

	if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget))
	return V;

	return lowerX86FPLogicOp(N, DAG, Subtarget);
	}

	/// Do target-specific dag combines on X86ISD::FANDN nodes.
	static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// FANDN(0.0, x) -> x
	if (isNullFPScalarOrVectorConst(N->getOperand(0)))
	return N->getOperand(1);

	// FANDN(x, 0.0) -> 0.0
	if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
	return V;

	return lowerX86FPLogicOp(N, DAG, Subtarget);
	}

	/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
	static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == X86ISD::FOR \|\| N->getOpcode() == X86ISD::FXOR);

	// F[X]OR(0.0, x) -> x
	if (isNullFPScalarOrVectorConst(N->getOperand(0)))
	return N->getOperand(1);

	// F[X]OR(x, 0.0) -> x
	if (isNullFPScalarOrVectorConst(N->getOperand(1)))
	return N->getOperand(0);

	if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget))
	return NewVal;

	return lowerX86FPLogicOp(N, DAG, Subtarget);
	}

	/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
	static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == X86ISD::FMIN \|\| N->getOpcode() == X86ISD::FMAX);

	// FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
	if (!DAG.getTarget().Options.NoNaNsFPMath \|\|
	!DAG.getTarget().Options.NoSignedZerosFPMath)
	return SDValue();

	// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
	// into FMINC and FMAXC, which are Commutative operations.
	unsigned NewOp = 0;
	switch (N->getOpcode()) {
	default: llvm_unreachable("unknown opcode");
	case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
	case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
	}

	return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
	N->getOperand(0), N->getOperand(1));
	}

	static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	if (Subtarget.useSoftFloat() \|\| isSoftFP16(VT, Subtarget))
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (!((Subtarget.hasSSE1() && VT == MVT::f32) \|\|
	(Subtarget.hasSSE2() && VT == MVT::f64) \|\|
	(Subtarget.hasFP16() && VT == MVT::f16) \|\|
	(VT.isVector() && TLI.isTypeLegal(VT))))
	return SDValue();

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	SDLoc DL(N);
	auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;

	// If we don't have to respect NaN inputs, this is a direct translation to x86
	// min/max instructions.
	if (DAG.getTarget().Options.NoNaNsFPMath \|\| N->getFlags().hasNoNaNs())
	return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());

	// If one of the operands is known non-NaN use the native min/max instructions
	// with the non-NaN input as second operand.
	if (DAG.isKnownNeverNaN(Op1))
	return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
	if (DAG.isKnownNeverNaN(Op0))
	return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());

	// If we have to respect NaN inputs, this takes at least 3 instructions.
	// Favor a library call when operating on a scalar and minimizing code size.
	if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
	return SDValue();

	EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	VT);

	// There are 4 possibilities involving NaN inputs, and these are the required
	// outputs:
	// Op1
	// Num NaN
	// ----------------
	// Num \| Max \| Op0 \|
	// Op0 ----------------
	// NaN \| Op1 \| NaN \|
	// ----------------
	//
	// The SSE FP max/min instructions were not designed for this case, but rather
	// to implement:
	// Min = Op1 < Op0 ? Op1 : Op0
	// Max = Op1 > Op0 ? Op1 : Op0
	//
	// So they always return Op0 if either input is a NaN. However, we can still
	// use those instructions for fmaxnum by selecting away a NaN input.

	// If either operand is NaN, the 2nd source operand (Op0) is passed through.
	SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
	SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO);

	// If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
	// are NaN, the NaN value of Op1 is the result.
	return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
	}

	static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
	if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
	return SDValue(N, 0);

	// Convert a full vector load into vzload when not all bits are needed.
	SDValue In = N->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
	ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
	assert(InVT.is128BitVector() && "Expected 128-bit input vector");
	LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
	unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
	MVT MemVT = MVT::getIntegerVT(NumBits);
	MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
	if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
	SDLoc dl(N);
	SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
	DAG.getBitcast(InVT, VZLoad));
	DCI.CombineTo(N, Convert);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return SDValue(N, 0);
	}
	}

	return SDValue();
	}

	static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	bool IsStrict = N->isTargetStrictFPOpcode();
	EVT VT = N->getValueType(0);

	// Convert a full vector load into vzload when not all bits are needed.
	SDValue In = N->getOperand(IsStrict ? 1 : 0);
	MVT InVT = In.getSimpleValueType();
	if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
	ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
	assert(InVT.is128BitVector() && "Expected 128-bit input vector");
	LoadSDNode *LN = cast<LoadSDNode>(In);
	unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
	MVT MemVT = MVT::getFloatingPointVT(NumBits);
	MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
	if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
	SDLoc dl(N);
	if (IsStrict) {
	SDValue Convert =
	DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},
	{N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});
	DCI.CombineTo(N, Convert, Convert.getValue(1));
	} else {
	SDValue Convert =
	DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));
	DCI.CombineTo(N, Convert);
	}
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return SDValue(N, 0);
	}
	}

	return SDValue();
	}

	/// Do target-specific dag combines on X86ISD::ANDNP nodes.
	static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	MVT VT = N->getSimpleValueType(0);
	int NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();

	// ANDNP(undef, x) -> 0
	// ANDNP(x, undef) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, SDLoc(N), VT);

	// ANDNP(0, x) -> x
	if (ISD::isBuildVectorAllZeros(N0.getNode()))
	return N1;

	// ANDNP(x, 0) -> 0
	if (ISD::isBuildVectorAllZeros(N1.getNode()))
	return DAG.getConstant(0, SDLoc(N), VT);

	// Turn ANDNP back to AND if input is inverted.
	if (SDValue Not = IsNOT(N0, DAG))
	return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);

	// Constant Folding
	APInt Undefs0, Undefs1;
	SmallVector<APInt> EltBits0, EltBits1;
	if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0)) {
	SDLoc DL(N);
	APInt ResultUndefs = APInt::getZero(NumElts);

	if (getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) {
	SmallVector<APInt> ResultBits;
	for (int I = 0; I != NumElts; ++I)
	ResultBits.push_back(~EltBits0[I] & EltBits1[I]);
	return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL);
	}

	// Constant fold NOT(N0) to allow us to use AND.
	// Ensure this is only performed if we can confirm that the bitcasted source
	// has oneuse to prevent an infinite loop with canonicalizeBitSelect.
	if (N0->hasOneUse()) {
	SDValue BC0 = peekThroughOneUseBitcasts(N0);
	if (BC0.getOpcode() != ISD::BITCAST) {
	for (APInt &Elt : EltBits0)
	Elt = ~Elt;
	SDValue Not = getConstVector(EltBits0, ResultUndefs, VT, DAG, DL);
	return DAG.getNode(ISD::AND, DL, VT, Not, N1);
	}
	}
	}

	// Attempt to recursively combine a bitmask ANDNP with shuffles.
	if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
	SDValue Op(N, 0);
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;

	// If either operand is a constant mask, then only the elements that aren't
	// zero are actually demanded by the other operand.
	auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
	APInt UndefElts;
	SmallVector<APInt> EltBits;
	APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
	APInt DemandedElts = APInt::getAllOnes(NumElts);
	if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
	EltBits)) {
	DemandedBits.clearAllBits();
	DemandedElts.clearAllBits();
	for (int I = 0; I != NumElts; ++I) {
	if (UndefElts[I]) {
	// We can't assume an undef src element gives an undef dst - the
	// other src might be zero.
	DemandedBits.setAllBits();
	DemandedElts.setBit(I);
	} else if ((Invert && !EltBits[I].isAllOnes()) \|\|
	(!Invert && !EltBits[I].isZero())) {
	DemandedBits \|= Invert ? ~EltBits[I] : EltBits[I];
	DemandedElts.setBit(I);
	}
	}
	}
	return std::make_pair(DemandedBits, DemandedElts);
	};
	APInt Bits0, Elts0;
	APInt Bits1, Elts1;
	std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
	std::tie(Bits1, Elts1) = GetDemandedMasks(N0, true);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedVectorElts(N0, Elts0, DCI) \|\|
	TLI.SimplifyDemandedVectorElts(N1, Elts1, DCI) \|\|
	TLI.SimplifyDemandedBits(N0, Bits0, Elts0, DCI) \|\|
	TLI.SimplifyDemandedBits(N1, Bits1, Elts1, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}
	}

	return SDValue();
	}

	static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDValue N1 = N->getOperand(1);

	// BT ignores high bits in the bit index operand.
	unsigned BitWidth = N1.getValueSizeInBits();
	APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
	if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}

	return SDValue();
	}

	static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS;
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);

	if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedElts = APInt::getLowBitsSet(8, 4);
	if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}

	// Convert a full vector load into vzload when not all bits are needed.
	if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
	LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0));
	if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::i64, MVT::v2i64, DAG)) {
	SDLoc dl(N);
	if (IsStrict) {
	SDValue Convert = DAG.getNode(
	N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
	{N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)});
	DCI.CombineTo(N, Convert, Convert.getValue(1));
	} else {
	SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32,
	DAG.getBitcast(MVT::v8i16, VZLoad));
	DCI.CombineTo(N, Convert);
	}

	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
	DCI.recursivelyDeleteUnusedNodes(LN);
	return SDValue(N, 0);
	}
	}
	}

	return SDValue();
	}

	// Try to combine sext_in_reg of a cmov of constants by extending the constants.
	static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);

	EVT DstVT = N->getValueType(0);

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT ExtraVT = cast<VTSDNode>(N1)->getVT();

	if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16)
	return SDValue();

	// Look through single use any_extends / truncs.
	SDValue IntermediateBitwidthOp;
	if ((N0.getOpcode() == ISD::ANY_EXTEND \|\| N0.getOpcode() == ISD::TRUNCATE) &&
	N0.hasOneUse()) {
	IntermediateBitwidthOp = N0;
	N0 = N0.getOperand(0);
	}

	// See if we have a single use cmov.
	if (N0.getOpcode() != X86ISD::CMOV \|\| !N0.hasOneUse())
	return SDValue();

	SDValue CMovOp0 = N0.getOperand(0);
	SDValue CMovOp1 = N0.getOperand(1);

	// Make sure both operands are constants.
	if (!isa<ConstantSDNode>(CMovOp0.getNode()) \|\|
	!isa<ConstantSDNode>(CMovOp1.getNode()))
	return SDValue();

	SDLoc DL(N);

	// If we looked through an any_extend/trunc above, add one to the constants.
	if (IntermediateBitwidthOp) {
	unsigned IntermediateOpc = IntermediateBitwidthOp.getOpcode();
	CMovOp0 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp0);
	CMovOp1 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp1);
	}

	CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp0, N1);
	CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp1, N1);

	EVT CMovVT = DstVT;
	// We do not want i16 CMOV's. Promote to i32 and truncate afterwards.
	if (DstVT == MVT::i16) {
	CMovVT = MVT::i32;
	CMovOp0 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp0);
	CMovOp1 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp1);
	}

	SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1,
	N0.getOperand(2), N0.getOperand(3));

	if (CMovVT != DstVT)
	CMov = DAG.getNode(ISD::TRUNCATE, DL, DstVT, CMov);

	return CMov;
	}

	static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);

	if (SDValue V = combineSextInRegCmov(N, DAG))
	return V;

	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
	SDLoc dl(N);

	// The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
	// both SSE and AVX2 since there is no sign-extended shift right
	// operation on a vector with 64-bit elements.
	//(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
	// (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
	if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND \|\|
	N0.getOpcode() == ISD::SIGN_EXTEND)) {
	SDValue N00 = N0.getOperand(0);

	// EXTLOAD has a better solution on AVX2,
	// it may be replaced with X86ISD::VSEXT node.
	if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256())
	if (!ISD::isNormalLoad(N00.getNode()))
	return SDValue();

	// Attempt to promote any comparison mask ops before moving the
	// SIGN_EXTEND_INREG in the way.
	if (SDValue Promote = PromoteMaskArithmetic(N0.getNode(), DAG, Subtarget))
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1);

	if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
	SDValue Tmp =
	DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1);
	return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
	}
	}
	return SDValue();
	}

	/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
	/// zext(add_nuw(x, C)) --> add(zext(x), C_zext)
	/// Promoting a sign/zero extension ahead of a no overflow 'add' exposes
	/// opportunities to combine math ops, use an LEA, or use a complex addressing
	/// mode. This can eliminate extend, add, and shift instructions.
	static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (Ext->getOpcode() != ISD::SIGN_EXTEND &&
	Ext->getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();

	// TODO: This should be valid for other integer types.
	EVT VT = Ext->getValueType(0);
	if (VT != MVT::i64)
	return SDValue();

	SDValue Add = Ext->getOperand(0);
	if (Add.getOpcode() != ISD::ADD)
	return SDValue();

	bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;
	bool NSW = Add->getFlags().hasNoSignedWrap();
	bool NUW = Add->getFlags().hasNoUnsignedWrap();

	// We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding
	// into the 'zext'
	if ((Sext && !NSW) \|\| (!Sext && !NUW))
	return SDValue();

	// Having a constant operand to the 'add' ensures that we are not increasing
	// the instruction count because the constant is extended for free below.
	// A constant operand can also become the displacement field of an LEA.
	auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
	if (!AddOp1)
	return SDValue();

	// Don't make the 'add' bigger if there's no hope of combining it with some
	// other 'add' or 'shl' instruction.
	// TODO: It may be profitable to generate simpler LEA instructions in place
	// of single 'add' instructions, but the cost model for selecting an LEA
	// currently has a high threshold.
	bool HasLEAPotential = false;
	for (auto *User : Ext->uses()) {
	if (User->getOpcode() == ISD::ADD \|\| User->getOpcode() == ISD::SHL) {
	HasLEAPotential = true;
	break;
	}
	}
	if (!HasLEAPotential)
	return SDValue();

	// Everything looks good, so pull the '{s\|z}ext' ahead of the 'add'.
	int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue();
	SDValue AddOp0 = Add.getOperand(0);
	SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);
	SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);

	// The wider add is guaranteed to not wrap because both operands are
	// sign-extended.
	SDNodeFlags Flags;
	Flags.setNoSignedWrap(NSW);
	Flags.setNoUnsignedWrap(NUW);
	return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
	}

	// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
	// operands and the result of CMOV is not used anywhere else - promote CMOV
	// itself instead of promoting its result. This could be beneficial, because:
	// 1) X86TargetLowering::EmitLoweredSelect later can do merging of two
	// (or more) pseudo-CMOVs only when they go one-after-another and
	// getting rid of result extension code after CMOV will help that.
	// 2) Promotion of constant CMOV arguments is free, hence the
	// {ANY,SIGN,ZERO}_EXTEND will just be deleted.
	// 3) 16-bit CMOV encoding is 4 bytes, 32-bit CMOV is 3-byte, so this
	// promotion is also good in terms of code-size.
	// (64-bit CMOV is 4-bytes, that's why we don't do 32-bit => 64-bit
	// promotion).
	static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) {
	SDValue CMovN = Extend->getOperand(0);
	if (CMovN.getOpcode() != X86ISD::CMOV \|\| !CMovN.hasOneUse())
	return SDValue();

	EVT TargetVT = Extend->getValueType(0);
	unsigned ExtendOpcode = Extend->getOpcode();
	SDLoc DL(Extend);

	EVT VT = CMovN.getValueType();
	SDValue CMovOp0 = CMovN.getOperand(0);
	SDValue CMovOp1 = CMovN.getOperand(1);

	if (!isa<ConstantSDNode>(CMovOp0.getNode()) \|\|
	!isa<ConstantSDNode>(CMovOp1.getNode()))
	return SDValue();

	// Only extend to i32 or i64.
	if (TargetVT != MVT::i32 && TargetVT != MVT::i64)
	return SDValue();

	// Only extend from i16 unless its a sign_extend from i32. Zext/aext from i32
	// are free.
	if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32))
	return SDValue();

	// If this a zero extend to i64, we should only extend to i32 and use a free
	// zero extend to finish.
	EVT ExtendVT = TargetVT;
	if (TargetVT == MVT::i64 && ExtendOpcode != ISD::SIGN_EXTEND)
	ExtendVT = MVT::i32;

	CMovOp0 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp0);
	CMovOp1 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp1);

	SDValue Res = DAG.getNode(X86ISD::CMOV, DL, ExtendVT, CMovOp0, CMovOp1,
	CMovN.getOperand(2), CMovN.getOperand(3));

	// Finish extending if needed.
	if (ExtendVT != TargetVT)
	Res = DAG.getNode(ExtendOpcode, DL, TargetVT, Res);

	return Res;
	}

	// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
	// result type.
	static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	// Only do this combine with AVX512 for vector extends.
	if (!Subtarget.hasAVX512() \|\| !VT.isVector() \|\| N0.getOpcode() != ISD::SETCC)
	return SDValue();

	// Only combine legal element types.
	EVT SVT = VT.getVectorElementType();
	if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
	SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
	return SDValue();

	// We don't have CMPP Instruction for vxf16
	if (N0.getOperand(0).getValueType().getVectorElementType() == MVT::f16)
	return SDValue();
	// We can only do this if the vector size in 256 bits or less.
	unsigned Size = VT.getSizeInBits();
	if (Size > 256 && Subtarget.useAVX512Regs())
	return SDValue();

	// Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
	// that's the only integer compares with we have.
	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
	if (ISD::isUnsignedIntSetCC(CC))
	return SDValue();

	// Only do this combine if the extension will be fully consumed by the setcc.
	EVT N00VT = N0.getOperand(0).getValueType();
	EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
	if (Size != MatchingVecType.getSizeInBits())
	return SDValue();

	SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);

	if (N->getOpcode() == ISD::ZERO_EXTEND)
	Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType());

	return Res;
	}

	static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
	if (!DCI.isBeforeLegalizeOps() &&
	N0.getOpcode() == X86ISD::SETCC_CARRY) {
	SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
	N0->getOperand(1));
	bool ReplaceOtherUses = !N0.hasOneUse();
	DCI.CombineTo(N, Setcc);
	// Replace other uses with a truncate of the widened setcc_carry.
	if (ReplaceOtherUses) {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
	N0.getValueType(), Setcc);
	DCI.CombineTo(N0.getNode(), Trunc);
	}

	return SDValue(N, 0);
	}

	if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
	return NewCMov;

	if (!DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
	return V;

	if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0,
	DAG, DCI, Subtarget))
	return V;

	if (VT.isVector()) {
	if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
	return R;

	if (N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
	return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
	}

	if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
	return NewAdd;

	return SDValue();
	}

	static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	bool IsStrict = N->isStrictFPOpcode() \|\| N->isTargetStrictFPOpcode();

	// Let legalize expand this if it isn't a legal type yet.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(VT))
	return SDValue();

	SDValue A = N->getOperand(IsStrict ? 1 : 0);
	SDValue B = N->getOperand(IsStrict ? 2 : 1);
	SDValue C = N->getOperand(IsStrict ? 3 : 2);

	// If the operation allows fast-math and the target does not support FMA,
	// split this into mul+add to avoid libcall(s).
	SDNodeFlags Flags = N->getFlags();
	if (!IsStrict && Flags.hasAllowReassociation() &&
	TLI.isOperationExpand(ISD::FMA, VT)) {
	SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags);
	return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags);
	}

	EVT ScalarVT = VT.getScalarType();
	if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) \|\|
	!Subtarget.hasAnyFMA()) &&
	!(ScalarVT == MVT::f16 && Subtarget.hasFP16()))
	return SDValue();

	auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
	bool LegalOperations = !DCI.isBeforeLegalizeOps();
	if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations,
	CodeSize)) {
	V = NegV;
	return true;
	}
	// Look through extract_vector_elts. If it comes from an FNEG, create a
	// new extract from the FNEG input.
	if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	isNullConstant(V.getOperand(1))) {
	SDValue Vec = V.getOperand(0);
	if (SDValue NegV = TLI.getCheaperNegatedExpression(
	Vec, DAG, LegalOperations, CodeSize)) {
	V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
	NegV, V.getOperand(1));
	return true;
	}
	}

	return false;
	};

	// Do not convert the passthru input of scalar intrinsics.
	// FIXME: We could allow negations of the lower element only.
	bool NegA = invertIfNegative(A);
	bool NegB = invertIfNegative(B);
	bool NegC = invertIfNegative(C);

	if (!NegA && !NegB && !NegC)
	return SDValue();

	unsigned NewOpcode =
	negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false);

	// Propagate fast-math-flags to new FMA node.
	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
	if (IsStrict) {
	assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4");
	return DAG.getNode(NewOpcode, dl, {VT, MVT::Other},
	{N->getOperand(0), A, B, C});
	} else {
	if (N->getNumOperands() == 4)
	return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
	return DAG.getNode(NewOpcode, dl, VT, A, B, C);
	}
	}

	// Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
	// Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C)
	static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
	bool LegalOperations = !DCI.isBeforeLegalizeOps();

	SDValue N2 = N->getOperand(2);

	SDValue NegN2 =
	TLI.getCheaperNegatedExpression(N2, DAG, LegalOperations, CodeSize);
	if (!NegN2)
	return SDValue();
	unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false);

	if (N->getNumOperands() == 4)
	return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),
	NegN2, N->getOperand(3));
	return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),
	NegN2);
	}

	static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc dl(N);
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
	// FIXME: Is this needed? We don't seem to have any tests for it.
	if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
	N0.getOpcode() == X86ISD::SETCC_CARRY) {
	SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
	N0->getOperand(1));
	bool ReplaceOtherUses = !N0.hasOneUse();
	DCI.CombineTo(N, Setcc);
	// Replace other uses with a truncate of the widened setcc_carry.
	if (ReplaceOtherUses) {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
	N0.getValueType(), Setcc);
	DCI.CombineTo(N0.getNode(), Trunc);
	}

	return SDValue(N, 0);
	}

	if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
	return NewCMov;

	if (DCI.isBeforeLegalizeOps())
	if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
	return V;

	if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0,
	DAG, DCI, Subtarget))
	return V;

	if (VT.isVector())
	if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
	return R;

	if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
	return NewAdd;

	if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
	return R;

	// TODO: Combine with any target/faux shuffle.
	if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 &&
	VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {
	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	unsigned NumSrcEltBits = N00.getScalarValueSizeInBits();
	APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2);
	if ((N00.isUndef() \|\| DAG.MaskedValueIsZero(N00, ZeroMask)) &&
	(N01.isUndef() \|\| DAG.MaskedValueIsZero(N01, ZeroMask))) {
	return concatSubVectors(N00, N01, DAG, dl);
	}
	}

	return SDValue();
	}

	/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
	/// recognizable memcmp expansion.
	static bool isOrXorXorTree(SDValue X, bool Root = true) {
	if (X.getOpcode() == ISD::OR)
	return isOrXorXorTree(X.getOperand(0), false) &&
	isOrXorXorTree(X.getOperand(1), false);
	if (Root)
	return false;
	return X.getOpcode() == ISD::XOR;
	}

	/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
	/// expansion.
	template <typename F>
	static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG,
	EVT VecVT, EVT CmpVT, bool HasPT, F SToV) {
	SDValue Op0 = X.getOperand(0);
	SDValue Op1 = X.getOperand(1);
	if (X.getOpcode() == ISD::OR) {
	SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV);
	SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV);
	if (VecVT != CmpVT)
	return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
	if (HasPT)
	return DAG.getNode(ISD::OR, DL, VecVT, A, B);
	return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
	}
	if (X.getOpcode() == ISD::XOR) {
	SDValue A = SToV(Op0);
	SDValue B = SToV(Op1);
	if (VecVT != CmpVT)
	return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
	if (HasPT)
	return DAG.getNode(ISD::XOR, DL, VecVT, A, B);
	return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
	}
	llvm_unreachable("Impossible");
	}

	/// Try to map a 128-bit or larger integer comparison to vector instructions
	/// before type legalization splits it up into chunks.
	static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
	assert((CC == ISD::SETNE \|\| CC == ISD::SETEQ) && "Bad comparison predicate");

	// We're looking for an oversized integer equality comparison.
	SDValue X = SetCC->getOperand(0);
	SDValue Y = SetCC->getOperand(1);
	EVT OpVT = X.getValueType();
	unsigned OpSize = OpVT.getSizeInBits();
	if (!OpVT.isScalarInteger() \|\| OpSize < 128)
	return SDValue();

	// Ignore a comparison with zero because that gets special treatment in
	// EmitTest(). But make an exception for the special case of a pair of
	// logically-combined vector-sized operands compared to zero. This pattern may
	// be generated by the memcmp expansion pass with oversized integer compares
	// (see PR33325).
	bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
	if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
	return SDValue();

	// Don't perform this combine if constructing the vector will be expensive.
	auto IsVectorBitCastCheap = [](SDValue X) {
	X = peekThroughBitcasts(X);
	return isa<ConstantSDNode>(X) \|\| X.getValueType().isVector() \|\|
	X.getOpcode() == ISD::LOAD;
	};
	if ((!IsVectorBitCastCheap(X) \|\| !IsVectorBitCastCheap(Y)) &&
	!IsOrXorXorTreeCCZero)
	return SDValue();

	EVT VT = SetCC->getValueType(0);
	SDLoc DL(SetCC);

	// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
	// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
	// Otherwise use PCMPEQ (plus AND) and mask testing.
	bool NoImplicitFloatOps =
	DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat);
	if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
	((OpSize == 128 && Subtarget.hasSSE2()) \|\|
	(OpSize == 256 && Subtarget.hasAVX()) \|\|
	(OpSize == 512 && Subtarget.useAVX512Regs()))) {
	bool HasPT = Subtarget.hasSSE41();

	// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
	// vector registers are essentially free. (Technically, widening registers
	// prevents load folding, but the tradeoff is worth it.)
	bool PreferKOT = Subtarget.preferMaskRegisters();
	bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512;

	EVT VecVT = MVT::v16i8;
	EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT;
	if (OpSize == 256) {
	VecVT = MVT::v32i8;
	CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
	}
	EVT CastVT = VecVT;
	bool NeedsAVX512FCast = false;
	if (OpSize == 512 \|\| NeedZExt) {
	if (Subtarget.hasBWI()) {
	VecVT = MVT::v64i8;
	CmpVT = MVT::v64i1;
	if (OpSize == 512)
	CastVT = VecVT;
	} else {
	VecVT = MVT::v16i32;
	CmpVT = MVT::v16i1;
	CastVT = OpSize == 512 ? VecVT :
	OpSize == 256 ? MVT::v8i32 : MVT::v4i32;
	NeedsAVX512FCast = true;
	}
	}

	auto ScalarToVector = [&](SDValue X) -> SDValue {
	bool TmpZext = false;
	EVT TmpCastVT = CastVT;
	if (X.getOpcode() == ISD::ZERO_EXTEND) {
	SDValue OrigX = X.getOperand(0);
	unsigned OrigSize = OrigX.getScalarValueSizeInBits();
	if (OrigSize < OpSize) {
	if (OrigSize == 128) {
	TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
	X = OrigX;
	TmpZext = true;
	} else if (OrigSize == 256) {
	TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
	X = OrigX;
	TmpZext = true;
	}
	}
	}
	X = DAG.getBitcast(TmpCastVT, X);
	if (!NeedZExt && !TmpZext)
	return X;
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT,
	DAG.getConstant(0, DL, VecVT), X,
	DAG.getVectorIdxConstant(0, DL));
	};

	SDValue Cmp;
	if (IsOrXorXorTreeCCZero) {
	// This is a bitwise-combined equality comparison of 2 pairs of vectors:
	// setcc i128 (or (xor A, B), (xor C, D)), 0, eq\|ne
	// Use 2 vector equality compares and 'and' the results before doing a
	// MOVMSK.
	Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector);
	} else {
	SDValue VecX = ScalarToVector(X);
	SDValue VecY = ScalarToVector(Y);
	if (VecVT != CmpVT) {
	Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
	} else if (HasPT) {
	Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY);
	} else {
	Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
	}
	}
	// AVX512 should emit a setcc that will lower to kortest.
	if (VecVT != CmpVT) {
	EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 :
	CmpVT == MVT::v32i1 ? MVT::i32 : MVT::i16;
	return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp),
	DAG.getConstant(0, DL, KRegVT), CC);
	}
	if (HasPT) {
	SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64,
	Cmp);
	SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp);
	X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
	SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0));
	}
	// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
	// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
	// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
	assert(Cmp.getValueType() == MVT::v16i8 &&
	"Non 128-bit vector on pre-SSE41 target");
	SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
	SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32);
	return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
	}

	return SDValue();
	}

	static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	const SDValue LHS = N->getOperand(0);
	const SDValue RHS = N->getOperand(1);
	EVT VT = N->getValueType(0);
	EVT OpVT = LHS.getValueType();
	SDLoc DL(N);

	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
	if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
	return V;

	if (VT == MVT::i1 && isNullConstant(RHS)) {
	SDValue X86CC;
	if (SDValue V =
	MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC))
	return DAG.getNode(ISD::TRUNCATE, DL, VT,
	DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V));
	}

	if (OpVT.isScalarInteger()) {
	// cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0)
	// cmpne(or(X,Y),X) --> cmpne(and(~X,Y),0)
	auto MatchOrCmpEq = [&](SDValue N0, SDValue N1) {
	if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) {
	if (N0.getOperand(0) == N1)
	return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT),
	N0.getOperand(1));
	if (N0.getOperand(1) == N1)
	return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT),
	N0.getOperand(0));
	}
	return SDValue();
	};
	if (SDValue AndN = MatchOrCmpEq(LHS, RHS))
	return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
	if (SDValue AndN = MatchOrCmpEq(RHS, LHS))
	return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);

	// cmpeq(and(X,Y),Y) --> cmpeq(and(~X,Y),0)
	// cmpne(and(X,Y),Y) --> cmpne(and(~X,Y),0)
	auto MatchAndCmpEq = [&](SDValue N0, SDValue N1) {
	if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) {
	if (N0.getOperand(0) == N1)
	return DAG.getNode(ISD::AND, DL, OpVT, N1,
	DAG.getNOT(DL, N0.getOperand(1), OpVT));
	if (N0.getOperand(1) == N1)
	return DAG.getNode(ISD::AND, DL, OpVT, N1,
	DAG.getNOT(DL, N0.getOperand(0), OpVT));
	}
	return SDValue();
	};
	if (SDValue AndN = MatchAndCmpEq(LHS, RHS))
	return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
	if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
	return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);

	// cmpeq(trunc(x),0) --> cmpeq(x,0)
	// cmpne(trunc(x),0) --> cmpne(x,0)
	// iff x upper bits are zero.
	// TODO: Add support for RHS to be truncate as well?
	if (LHS.getOpcode() == ISD::TRUNCATE &&
	LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
	isNullConstant(RHS) && !DCI.isBeforeLegalize()) {
	EVT SrcVT = LHS.getOperand(0).getValueType();
	APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
	OpVT.getScalarSizeInBits());
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) &&
	TLI.isTypeLegal(LHS.getOperand(0).getValueType()))
	return DAG.getSetCC(DL, VT, LHS.getOperand(0),
	DAG.getConstant(0, DL, SrcVT), CC);
	}
	}
	}

	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
	(CC == ISD::SETNE \|\| CC == ISD::SETEQ \|\| ISD::isSignedIntSetCC(CC))) {
	// Using temporaries to avoid messing up operand ordering for later
	// transformations if this doesn't work.
	SDValue Op0 = LHS;
	SDValue Op1 = RHS;
	ISD::CondCode TmpCC = CC;
	// Put build_vector on the right.
	if (Op0.getOpcode() == ISD::BUILD_VECTOR) {
	std::swap(Op0, Op1);
	TmpCC = ISD::getSetCCSwappedOperands(TmpCC);
	}

	bool IsSEXT0 =
	(Op0.getOpcode() == ISD::SIGN_EXTEND) &&
	(Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1);
	bool IsVZero1 = ISD::isBuildVectorAllZeros(Op1.getNode());

	if (IsSEXT0 && IsVZero1) {
	assert(VT == Op0.getOperand(0).getValueType() &&
	"Unexpected operand type");
	if (TmpCC == ISD::SETGT)
	return DAG.getConstant(0, DL, VT);
	if (TmpCC == ISD::SETLE)
	return DAG.getConstant(1, DL, VT);
	if (TmpCC == ISD::SETEQ \|\| TmpCC == ISD::SETGE)
	return DAG.getNOT(DL, Op0.getOperand(0), VT);

	assert((TmpCC == ISD::SETNE \|\| TmpCC == ISD::SETLT) &&
	"Unexpected condition code!");
	return Op0.getOperand(0);
	}
	}

	// If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
	// pre-promote its result type since vXi1 vectors don't get promoted
	// during type legalization.
	// NOTE: The element count check is to ignore operand types that need to
	// go through type promotion to a 128-bit vector.
	if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
	VT.getVectorElementType() == MVT::i1 &&
	(OpVT.getVectorElementType() == MVT::i8 \|\|
	OpVT.getVectorElementType() == MVT::i16)) {
	SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
	}

	// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
	// to avoid scalarization via legalization because v4i32 is not a legal type.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
	LHS.getValueType() == MVT::v4f32)
	return LowerVSETCC(SDValue(N, 0), Subtarget, DAG);

	// X pred 0.0 --> X pred -X
	// If the negation of X already exists, use it in the comparison. This removes
	// the need to materialize 0.0 and allows matching to SSE's MIN/MAX
	// instructions in patterns with a 'select' node.
	if (isNullFPScalarOrVectorConst(RHS)) {
	SDVTList FNegVT = DAG.getVTList(OpVT);
	if (SDNode *FNeg = DAG.getNodeIfExists(ISD::FNEG, FNegVT, {LHS}))
	return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC);
	}

	return SDValue();
	}

	static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue Src = N->getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	MVT VT = N->getSimpleValueType(0);
	unsigned NumBits = VT.getScalarSizeInBits();
	unsigned NumElts = SrcVT.getVectorNumElements();
	unsigned NumBitsPerElt = SrcVT.getScalarSizeInBits();
	assert(VT == MVT::i32 && NumElts <= NumBits && "Unexpected MOVMSK types");

	// Perform constant folding.
	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	if (getTargetConstantBitsFromNode(Src, NumBitsPerElt, UndefElts, EltBits)) {
	APInt Imm(32, 0);
	for (unsigned Idx = 0; Idx != NumElts; ++Idx)
	if (!UndefElts[Idx] && EltBits[Idx].isNegative())
	Imm.setBit(Idx);

	return DAG.getConstant(Imm, SDLoc(N), VT);
	}

	// Look through int->fp bitcasts that don't change the element width.
	unsigned EltWidth = SrcVT.getScalarSizeInBits();
	if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST &&
	Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
	return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));

	// Fold movmsk(not(x)) -> not(movmsk(x)) to improve folding of movmsk results
	// with scalar comparisons.
	if (SDValue NotSrc = IsNOT(Src, DAG)) {
	SDLoc DL(N);
	APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts);
	NotSrc = DAG.getBitcast(SrcVT, NotSrc);
	return DAG.getNode(ISD::XOR, DL, VT,
	DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc),
	DAG.getConstant(NotMask, DL, VT));
	}

	// Fold movmsk(icmp_sgt(x,-1)) -> not(movmsk(x)) to improve folding of movmsk
	// results with scalar comparisons.
	if (Src.getOpcode() == X86ISD::PCMPGT &&
	ISD::isBuildVectorAllOnes(Src.getOperand(1).getNode())) {
	SDLoc DL(N);
	APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts);
	return DAG.getNode(ISD::XOR, DL, VT,
	DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)),
	DAG.getConstant(NotMask, DL, VT));
	}

	// Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
	// iff pow2splat(c1).
	if (Src.getOpcode() == X86ISD::PCMPEQ &&
	Src.getOperand(0).getOpcode() == ISD::AND &&
	ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
	SDValue LHS = Src.getOperand(0).getOperand(0);
	SDValue RHS = Src.getOperand(0).getOperand(1);
	KnownBits KnownRHS = DAG.computeKnownBits(RHS);
	if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) {
	SDLoc DL(N);
	MVT ShiftVT = SrcVT;
	if (ShiftVT.getScalarType() == MVT::i8) {
	// vXi8 shifts - we only care about the signbit so can use PSLLW.
	ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
	LHS = DAG.getBitcast(ShiftVT, LHS);
	}
	unsigned ShiftAmt = KnownRHS.getConstant().countLeadingZeros();
	LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS,
	ShiftAmt, DAG);
	LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT);
	return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS);
	}
	}

	// Simplify the inputs.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedMask(APInt::getAllOnes(NumBits));
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
	SDValue BasePtr = MemOp->getBasePtr();
	SDValue Index = MemOp->getIndex();
	SDValue Scale = MemOp->getScale();
	SDValue Mask = MemOp->getMask();

	// Attempt to fold an index scale into the scale value directly.
	// For smaller indices, implicit sext is performed BEFORE scale, preventing
	// this fold under most circumstances.
	// TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
	if ((Index.getOpcode() == X86ISD::VSHLI \|\|
	(Index.getOpcode() == ISD::ADD &&
	Index.getOperand(0) == Index.getOperand(1))) &&
	isa<ConstantSDNode>(Scale) &&
	BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) {
	unsigned ShiftAmt =
	Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
	uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
	uint64_t NewScaleAmt = ScaleAmt * (1ULL << ShiftAmt);
	if (isPowerOf2_64(NewScaleAmt) && NewScaleAmt <= 8) {
	SDValue NewIndex = Index.getOperand(0);
	SDValue NewScale =
	DAG.getTargetConstant(NewScaleAmt, SDLoc(N), Scale.getValueType());
	if (N->getOpcode() == X86ISD::MGATHER)
	return getAVX2GatherNode(N->getOpcode(), SDValue(N, 0), DAG,
	MemOp->getOperand(1), Mask,
	MemOp->getBasePtr(), NewIndex, NewScale,
	MemOp->getChain(), Subtarget);
	if (N->getOpcode() == X86ISD::MSCATTER)
	return getScatterNode(N->getOpcode(), SDValue(N, 0), DAG,
	MemOp->getOperand(1), Mask, MemOp->getBasePtr(),
	NewIndex, NewScale, MemOp->getChain(), Subtarget);
	}
	}

	// With vector masks we only demand the upper bit of the mask.
	if (Mask.getScalarValueSizeInBits() != 1) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
	if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}
	}

	return SDValue();
	}

	static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
	SDValue Index, SDValue Base, SDValue Scale,
	SelectionDAG &DAG) {
	SDLoc DL(GorS);

	if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
	SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
	Gather->getMask(), Base, Index, Scale } ;
	return DAG.getMaskedGather(Gather->getVTList(),
	Gather->getMemoryVT(), DL, Ops,
	Gather->getMemOperand(),
	Gather->getIndexType(),
	Gather->getExtensionType());
	}
	auto *Scatter = cast<MaskedScatterSDNode>(GorS);
	SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
	Scatter->getMask(), Base, Index, Scale };
	return DAG.getMaskedScatter(Scatter->getVTList(),
	Scatter->getMemoryVT(), DL,
	Ops, Scatter->getMemOperand(),
	Scatter->getIndexType(),
	Scatter->isTruncatingStore());
	}

	static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDLoc DL(N);
	auto *GorS = cast<MaskedGatherScatterSDNode>(N);
	SDValue Index = GorS->getIndex();
	SDValue Base = GorS->getBasePtr();
	SDValue Scale = GorS->getScale();

	if (DCI.isBeforeLegalize()) {
	unsigned IndexWidth = Index.getScalarValueSizeInBits();

	// Shrink constant indices if they are larger than 32-bits.
	// Only do this before legalize types since v2i64 could become v2i32.
	// FIXME: We could check that the type is legal if we're after legalize
	// types, but then we would need to construct test cases where that happens.
	// FIXME: We could support more than just constant vectors, but we need to
	// careful with costing. A truncate that can be optimized out would be fine.
	// Otherwise we might only want to create a truncate if it avoids a split.
	if (auto *BV = dyn_cast<BuildVectorSDNode>(Index)) {
	if (BV->isConstant() && IndexWidth > 32 &&
	DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {
	EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32);
	Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index);
	return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
	}
	}

	// Shrink any sign/zero extends from 32 or smaller to larger than 32 if
	// there are sufficient sign bits. Only do this before legalize types to
	// avoid creating illegal types in truncate.
	if ((Index.getOpcode() == ISD::SIGN_EXTEND \|\|
	Index.getOpcode() == ISD::ZERO_EXTEND) &&
	IndexWidth > 32 &&
	Index.getOperand(0).getScalarValueSizeInBits() <= 32 &&
	DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {
	EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32);
	Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index);
	return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
	}
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	// Try to move splat constant adders from the index operand to the base
	// pointer operand. Taking care to multiply by the scale. We can only do
	// this when index element type is the same as the pointer type.
	// Otherwise we need to be sure the math doesn't wrap before the scale.
	if (Index.getOpcode() == ISD::ADD &&
	Index.getValueType().getVectorElementType() == PtrVT &&
	isa<ConstantSDNode>(Scale)) {
	uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
	if (auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) {
	BitVector UndefElts;
	if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) {
	// FIXME: Allow non-constant?
	if (UndefElts.none()) {
	// Apply the scale.
	APInt Adder = C->getAPIntValue() * ScaleAmt;
	// Add it to the existing base.
	Base = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
	DAG.getConstant(Adder, DL, PtrVT));
	Index = Index.getOperand(0);
	return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
	}
	}

	// It's also possible base is just a constant. In that case, just
	// replace it with 0 and move the displacement into the index.
	if (BV->isConstant() && isa<ConstantSDNode>(Base) &&
	isOneConstant(Scale)) {
	SDValue Splat = DAG.getSplatBuildVector(Index.getValueType(), DL, Base);
	// Combine the constant build_vector and the constant base.
	Splat = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
	Index.getOperand(1), Splat);
	// Add to the LHS of the original Index add.
	Index = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
	Index.getOperand(0), Splat);
	Base = DAG.getConstant(0, DL, Base.getValueType());
	return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
	}
	}
	}

	if (DCI.isBeforeLegalizeOps()) {
	unsigned IndexWidth = Index.getScalarValueSizeInBits();

	// Make sure the index is either i32 or i64
	if (IndexWidth != 32 && IndexWidth != 64) {
	MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32;
	EVT IndexVT = Index.getValueType().changeVectorElementType(EltVT);
	Index = DAG.getSExtOrTrunc(Index, DL, IndexVT);
	return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
	}
	}

	// With vector masks we only demand the upper bit of the mask.
	SDValue Mask = GorS->getMask();
	if (Mask.getScalarValueSizeInBits() != 1) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
	if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) {
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}
	}

	return SDValue();
	}

	// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
	static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
	SDValue EFLAGS = N->getOperand(1);

	// Try to simplify the EFLAGS and condition code operands.
	if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget))
	return getSETCC(CC, Flags, DL, DAG);

	return SDValue();
	}

	/// Optimize branch condition evaluation.
	static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	SDValue EFLAGS = N->getOperand(3);
	X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));

	// Try to simplify the EFLAGS and condition code operands.
	// Make sure to not keep references to operands, as combineSetCCEFLAGS can
	// RAUW them under us.
	if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) {
	SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
	N->getOperand(1), Cond, Flags);
	}

	return SDValue();
	}

	// TODO: Could we move this to DAGCombine?
	static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
	SelectionDAG &DAG) {
	// Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane
	// to optimize away operation when it's from a constant.
	//
	// The general transformation is:
	// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
	// AND(VECTOR_CMP(x,y), constant2)
	// constant2 = UNARYOP(constant)

	// Early exit if this isn't a vector operation, the operand of the
	// unary operation isn't a bitwise AND, or if the sizes of the operations
	// aren't the same.
	EVT VT = N->getValueType(0);
	bool IsStrict = N->isStrictFPOpcode();
	unsigned NumEltBits = VT.getScalarSizeInBits();
	SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
	if (!VT.isVector() \|\| Op0.getOpcode() != ISD::AND \|\|
	DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits \|\|
	VT.getSizeInBits() != Op0.getValueSizeInBits())
	return SDValue();

	// Now check that the other operand of the AND is a constant. We could
	// make the transformation for non-constant splats as well, but it's unclear
	// that would be a benefit as it would not eliminate any operations, just
	// perform one more step in scalar code before moving to the vector unit.
	if (auto *BV = dyn_cast<BuildVectorSDNode>(Op0.getOperand(1))) {
	// Bail out if the vector isn't a constant.
	if (!BV->isConstant())
	return SDValue();

	// Everything checks out. Build up the new and improved node.
	SDLoc DL(N);
	EVT IntVT = BV->getValueType(0);
	// Create a new constant of the appropriate type for the transformed
	// DAG.
	SDValue SourceConst;
	if (IsStrict)
	SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other},
	{N->getOperand(0), SDValue(BV, 0)});
	else
	SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
	// The AND node needs bitcasts to/from an integer vector type around it.
	SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
	SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0),
	MaskConst);
	SDValue Res = DAG.getBitcast(VT, NewAnd);
	if (IsStrict)
	return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL);
	return Res;
	}

	return SDValue();
	}

	/// If we are converting a value to floating-point, try to replace scalar
	/// truncate of an extracted vector element with a bitcast. This tries to keep
	/// the sequence on XMM registers rather than moving between vector and GPRs.
	static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) {
	// TODO: This is currently only used by combineSIntToFP, but it is generalized
	// to allow being called by any similar cast opcode.
	// TODO: Consider merging this into lowering: vectorizeExtractedCast().
	SDValue Trunc = N->getOperand(0);
	if (!Trunc.hasOneUse() \|\| Trunc.getOpcode() != ISD::TRUNCATE)
	return SDValue();

	SDValue ExtElt = Trunc.getOperand(0);
	if (!ExtElt.hasOneUse() \|\| ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isNullConstant(ExtElt.getOperand(1)))
	return SDValue();

	EVT TruncVT = Trunc.getValueType();
	EVT SrcVT = ExtElt.getValueType();
	unsigned DestWidth = TruncVT.getSizeInBits();
	unsigned SrcWidth = SrcVT.getSizeInBits();
	if (SrcWidth % DestWidth != 0)
	return SDValue();

	// inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0)
	EVT SrcVecVT = ExtElt.getOperand(0).getValueType();
	unsigned VecWidth = SrcVecVT.getSizeInBits();
	unsigned NumElts = VecWidth / DestWidth;
	EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts);
	SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0));
	SDLoc DL(N);
	SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT,
	BitcastVec, ExtElt.getOperand(1));
	return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt);
	}

	static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	bool IsStrict = N->isStrictFPOpcode();
	SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
	EVT VT = N->getValueType(0);
	EVT InVT = Op0.getValueType();

	// UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
	// UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
	// UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
	if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
	unsigned ScalarSize = InVT.getScalarSizeInBits();
	if (ScalarSize == 16 \|\| ScalarSize == 32 \|\| ScalarSize >= 64)
	return SDValue();
	SDLoc dl(N);
	EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
	ScalarSize < 16 ? MVT::i16
	: ScalarSize < 32 ? MVT::i32
	: MVT::i64,
	InVT.getVectorNumElements());
	SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
	{N->getOperand(0), P});
	return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
	}

	// UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
	// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
	// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
	if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
	VT.getScalarType() != MVT::f16) {
	SDLoc dl(N);
	EVT DstVT = InVT.changeVectorElementType(MVT::i32);
	SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);

	// UINT_TO_FP isn't legal without AVX512 so use SINT_TO_FP.
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
	{N->getOperand(0), P});
	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
	}

	// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
	// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
	// the optimization here.
	if (DAG.SignBitIsZero(Op0)) {
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other},
	{N->getOperand(0), Op0});
	return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
	}

	return SDValue();
	}

	static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	// First try to optimize away the conversion entirely when it's
	// conditionally from a constant. Vectors only.
	bool IsStrict = N->isStrictFPOpcode();
	if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG))
	return Res;

	// Now move on to more general possibilities.
	SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
	EVT VT = N->getValueType(0);
	EVT InVT = Op0.getValueType();

	// SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
	// SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
	// SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
	if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
	unsigned ScalarSize = InVT.getScalarSizeInBits();
	if (ScalarSize == 16 \|\| ScalarSize == 32 \|\| ScalarSize >= 64)
	return SDValue();
	SDLoc dl(N);
	EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
	ScalarSize < 16 ? MVT::i16
	: ScalarSize < 32 ? MVT::i32
	: MVT::i64,
	InVT.getVectorNumElements());
	SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
	{N->getOperand(0), P});
	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
	}

	// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
	// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
	// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
	if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
	VT.getScalarType() != MVT::f16) {
	SDLoc dl(N);
	EVT DstVT = InVT.changeVectorElementType(MVT::i32);
	SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
	{N->getOperand(0), P});
	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
	}

	// Without AVX512DQ we only support i64 to float scalar conversion. For both
	// vectors and scalars, see if we know that the upper bits are all the sign
	// bit, in which case we can truncate the input to i32 and convert from that.
	if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) {
	unsigned BitWidth = InVT.getScalarSizeInBits();
	unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
	if (NumSignBits >= (BitWidth - 31)) {
	EVT TruncVT = MVT::i32;
	if (InVT.isVector())
	TruncVT = InVT.changeVectorElementType(TruncVT);
	SDLoc dl(N);
	if (DCI.isBeforeLegalize() \|\| TruncVT != MVT::v2i32) {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
	if (IsStrict)
	return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
	{N->getOperand(0), Trunc});
	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
	}
	// If we're after legalize and the type is v2i32 we need to shuffle and
	// use CVTSI2P.
	assert(InVT == MVT::v2i64 && "Unexpected VT!");
	SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
	SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
	{ 0, 2, -1, -1 });
	if (IsStrict)
	return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},
	{N->getOperand(0), Shuf});
	return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);
	}
	}

	// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
	// a 32-bit target where SSE doesn't support i64->FP operations.
	if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
	Op0.getOpcode() == ISD::LOAD) {
	LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());

	// This transformation is not supported if the result type is f16 or f128.
	if (VT == MVT::f16 \|\| VT == MVT::f128)
	return SDValue();

	// If we have AVX512DQ we can use packed conversion instructions unless
	// the VT is f80.
	if (Subtarget.hasDQI() && VT != MVT::f80)
	return SDValue();

	if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) &&
	Op0.hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) {
	std::pair<SDValue, SDValue> Tmp =
	Subtarget.getTargetLowering()->BuildFILD(
	VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG);
	DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second);
	return Tmp.first;
	}
	}

	if (IsStrict)
	return SDValue();

	if (SDValue V = combineToFPTruncExtElt(N, DAG))
	return V;

	return SDValue();
	}

	static bool needCarryOrOverflowFlag(SDValue Flags) {
	assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");

	for (const SDNode *User : Flags->uses()) {
	X86::CondCode CC;
	switch (User->getOpcode()) {
	default:
	// Be conservative.
	return true;
	case X86ISD::SETCC:
	case X86ISD::SETCC_CARRY:
	CC = (X86::CondCode)User->getConstantOperandVal(0);
	break;
	case X86ISD::BRCOND:
	case X86ISD::CMOV:
	CC = (X86::CondCode)User->getConstantOperandVal(2);
	break;
	}

	switch (CC) {
	default: break;
	case X86::COND_A: case X86::COND_AE:
	case X86::COND_B: case X86::COND_BE:
	case X86::COND_O: case X86::COND_NO:
	case X86::COND_G: case X86::COND_GE:
	case X86::COND_L: case X86::COND_LE:
	return true;
	}
	}

	return false;
	}

	static bool onlyZeroFlagUsed(SDValue Flags) {
	assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");

	for (const SDNode *User : Flags->uses()) {
	unsigned CCOpNo;
	switch (User->getOpcode()) {
	default:
	// Be conservative.
	return false;
	case X86ISD::SETCC:
	case X86ISD::SETCC_CARRY:
	CCOpNo = 0;
	break;
	case X86ISD::BRCOND:
	case X86ISD::CMOV:
	CCOpNo = 2;
	break;
	}

	X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
	if (CC != X86::COND_E && CC != X86::COND_NE)
	return false;
	}

	return true;
	}

	/// If this is an add or subtract where one operand is produced by a cmp+setcc,
	/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
	/// with CMP+{ADC, SBB}.
	/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
	static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
	SDValue X, SDValue Y,
	SelectionDAG &DAG,
	bool ZeroSecondOpOnly = false) {
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	// Look through a one-use zext.
	if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
	Y = Y.getOperand(0);

	X86::CondCode CC;
	SDValue EFLAGS;
	if (Y.getOpcode() == X86ISD::SETCC && Y.hasOneUse()) {
	CC = (X86::CondCode)Y.getConstantOperandVal(0);
	EFLAGS = Y.getOperand(1);
	} else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1)) &&
	Y.hasOneUse()) {
	EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
	}

	if (!EFLAGS)
	return SDValue();

	// If X is -1 or 0, then we have an opportunity to avoid constants required in
	// the general case below.
	auto *ConstantX = dyn_cast<ConstantSDNode>(X);
	if (ConstantX && !ZeroSecondOpOnly) {
	if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) \|\|
	(IsSub && CC == X86::COND_B && ConstantX->isZero())) {
	// This is a complicated way to get -1 or 0 from the carry flag:
	// -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
	// 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
	EFLAGS);
	}

	if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) \|\|
	(IsSub && CC == X86::COND_A && ConstantX->isZero())) {
	if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
	EFLAGS.getValueType().isInteger() &&
	!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
	// Swap the operands of a SUB, and we have the same pattern as above.
	// -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
	// 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
	SDValue NewSub = DAG.getNode(
	X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
	EFLAGS.getOperand(1), EFLAGS.getOperand(0));
	SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
	NewEFLAGS);
	}
	}
	}

	if (CC == X86::COND_B) {
	// X + SETB Z --> adc X, 0
	// X - SETB Z --> sbb X, 0
	return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
	DAG.getVTList(VT, MVT::i32), X,
	DAG.getConstant(0, DL, VT), EFLAGS);
	}

	if (ZeroSecondOpOnly)
	return SDValue();

	if (CC == X86::COND_A) {
	// Try to convert COND_A into COND_B in an attempt to facilitate
	// materializing "setb reg".
	//
	// Do not flip "e > c", where "c" is a constant, because Cmp instruction
	// cannot take an immediate as its first operand.
	//
	if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
	EFLAGS.getValueType().isInteger() &&
	!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
	SDValue NewSub =
	DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
	EFLAGS.getOperand(1), EFLAGS.getOperand(0));
	SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
	return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
	DAG.getVTList(VT, MVT::i32), X,
	DAG.getConstant(0, DL, VT), NewEFLAGS);
	}
	}

	if (CC == X86::COND_AE) {
	// X + SETAE --> sbb X, -1
	// X - SETAE --> adc X, -1
	return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
	DAG.getVTList(VT, MVT::i32), X,
	DAG.getConstant(-1, DL, VT), EFLAGS);
	}

	if (CC == X86::COND_BE) {
	// X + SETBE --> sbb X, -1
	// X - SETBE --> adc X, -1
	// Try to convert COND_BE into COND_AE in an attempt to facilitate
	// materializing "setae reg".
	//
	// Do not flip "e <= c", where "c" is a constant, because Cmp instruction
	// cannot take an immediate as its first operand.
	//
	if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
	EFLAGS.getValueType().isInteger() &&
	!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
	SDValue NewSub =
	DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
	EFLAGS.getOperand(1), EFLAGS.getOperand(0));
	SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
	return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
	DAG.getVTList(VT, MVT::i32), X,
	DAG.getConstant(-1, DL, VT), NewEFLAGS);
	}
	}

	if (CC != X86::COND_E && CC != X86::COND_NE)
	return SDValue();

	if (EFLAGS.getOpcode() != X86ISD::CMP \|\| !EFLAGS.hasOneUse() \|\|
	!X86::isZeroNode(EFLAGS.getOperand(1)) \|\|
	!EFLAGS.getOperand(0).getValueType().isInteger())
	return SDValue();

	SDValue Z = EFLAGS.getOperand(0);
	EVT ZVT = Z.getValueType();

	// If X is -1 or 0, then we have an opportunity to avoid constants required in
	// the general case below.
	if (ConstantX) {
	// 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
	// fake operands:
	// 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
	// -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
	if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) \|\|
	(!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
	SDValue Zero = DAG.getConstant(0, DL, ZVT);
	SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
	SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
	SDValue(Neg.getNode(), 1));
	}

	// cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
	// with fake operands:
	// 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
	// -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
	if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) \|\|
	(!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
	SDValue One = DAG.getConstant(1, DL, ZVT);
	SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
	SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
	Cmp1.getValue(1));
	}
	}

	// (cmp Z, 1) sets the carry flag if Z is 0.
	SDValue One = DAG.getConstant(1, DL, ZVT);
	SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
	SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);

	// Add the flags type for ADC/SBB nodes.
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);

	// X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
	// X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
	if (CC == X86::COND_NE)
	return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
	DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));

	// X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
	// X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
	return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
	DAG.getConstant(0, DL, VT), Cmp1.getValue(1));
	}

	/// If this is an add or subtract where one operand is produced by a cmp+setcc,
	/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
	/// with CMP+{ADC, SBB}.
	static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
	bool IsSub = N->getOpcode() == ISD::SUB;
	SDValue X = N->getOperand(0);
	SDValue Y = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG))
	return ADCOrSBB;

	// Commute and try again (negate the result for subtracts).
	if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) {
	if (IsSub)
	ADCOrSBB =
	DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), ADCOrSBB);
	return ADCOrSBB;
	}

	return SDValue();
	}

	static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
	// Only handle test patterns.
	if (!isNullConstant(N->getOperand(1)))
	return SDValue();

	// If we have a CMP of a truncated binop, see if we can make a smaller binop
	// and use its flags directly.
	// TODO: Maybe we should try promoting compares that only use the zero flag
	// first if we can prove the upper bits with computeKnownBits?
	SDLoc dl(N);
	SDValue Op = N->getOperand(0);
	EVT VT = Op.getValueType();

	// If we have a constant logical shift that's only used in a comparison
	// against zero turn it into an equivalent AND. This allows turning it into
	// a TEST instruction later.
	if ((Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SHL) &&
	Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) &&
	onlyZeroFlagUsed(SDValue(N, 0))) {
	unsigned BitWidth = VT.getSizeInBits();
	const APInt &ShAmt = Op.getConstantOperandAPInt(1);
	if (ShAmt.ult(BitWidth)) { // Avoid undefined shifts.
	unsigned MaskBits = BitWidth - ShAmt.getZExtValue();
	APInt Mask = Op.getOpcode() == ISD::SRL
	? APInt::getHighBitsSet(BitWidth, MaskBits)
	: APInt::getLowBitsSet(BitWidth, MaskBits);
	if (Mask.isSignedIntN(32)) {
	Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
	DAG.getConstant(Mask, dl, VT));
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, VT));
	}
	}
	}

	// Peek through any zero-extend if we're only testing for a zero result.
	if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	if (SrcVT.getScalarSizeInBits() >= 8 &&
	DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
	DAG.getConstant(0, dl, SrcVT));
	}

	// Look for a truncate.
	if (Op.getOpcode() != ISD::TRUNCATE)
	return SDValue();

	SDValue Trunc = Op;
	Op = Op.getOperand(0);

	// See if we can compare with zero against the truncation source,
	// which should help using the Z flag from many ops. Only do this for
	// i32 truncated op to prevent partial-reg compares of promoted ops.
	EVT OpVT = Op.getValueType();
	APInt UpperBits =
	APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits());
	if (OpVT == MVT::i32 && DAG.MaskedValueIsZero(Op, UpperBits) &&
	onlyZeroFlagUsed(SDValue(N, 0))) {
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, OpVT));
	}

	// After this the truncate and arithmetic op must have a single use.
	if (!Trunc.hasOneUse() \|\| !Op.hasOneUse())
	return SDValue();

	unsigned NewOpc;
	switch (Op.getOpcode()) {
	default: return SDValue();
	case ISD::AND:
	// Skip and with constant. We have special handling for and with immediate
	// during isel to generate test instructions.
	if (isa<ConstantSDNode>(Op.getOperand(1)))
	return SDValue();
	NewOpc = X86ISD::AND;
	break;
	case ISD::OR: NewOpc = X86ISD::OR; break;
	case ISD::XOR: NewOpc = X86ISD::XOR; break;
	case ISD::ADD:
	// If the carry or overflow flag is used, we can't truncate.
	if (needCarryOrOverflowFlag(SDValue(N, 0)))
	return SDValue();
	NewOpc = X86ISD::ADD;
	break;
	case ISD::SUB:
	// If the carry or overflow flag is used, we can't truncate.
	if (needCarryOrOverflowFlag(SDValue(N, 0)))
	return SDValue();
	NewOpc = X86ISD::SUB;
	break;
	}

	// We found an op we can narrow. Truncate its inputs.
	SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));
	SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));

	// Use a X86 specific opcode to avoid DAG combine messing with it.
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1);

	// For AND, keep a CMP so that we can match the test pattern.
	if (NewOpc == X86ISD::AND)
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, VT));

	// Return the flags.
	return Op.getValue(1);
	}

	static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	assert((X86ISD::ADD == N->getOpcode() \|\| X86ISD::SUB == N->getOpcode()) &&
	"Expected X86ISD::ADD or X86ISD::SUB");

	SDLoc DL(N);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	MVT VT = LHS.getSimpleValueType();
	bool IsSub = X86ISD::SUB == N->getOpcode();
	unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;

	// If we don't use the flag result, simplify back to a generic ADD/SUB.
	if (!N->hasAnyUseOfValue(1)) {
	SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
	return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
	}

	// Fold any similar generic ADD/SUB opcodes to reuse this node.
	auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
	SDValue Ops[] = {N0, N1};
	SDVTList VTs = DAG.getVTList(N->getValueType(0));
	if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
	SDValue Op(N, 0);
	if (Negate)
	Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
	DCI.CombineTo(GenericAddSub, Op);
	}
	};
	MatchGeneric(LHS, RHS, false);
	MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());

	// TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
	// EFLAGS result doesn't change.
	return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG,
	/ZeroSecondOpOnly/ true);
	}

	static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue BorrowIn = N->getOperand(2);

	if (SDValue Flags = combineCarryThroughADD(BorrowIn, DAG)) {
	MVT VT = N->getSimpleValueType(0);
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, LHS, RHS, Flags);
	}

	// Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)
	// iff the flag result is dead.
	if (LHS.getOpcode() == ISD::SUB && isNullConstant(RHS) &&
	!N->hasAnyUseOfValue(1))
	return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), LHS.getOperand(0),
	LHS.getOperand(1), BorrowIn);

	return SDValue();
	}

	// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
	static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);
	auto *LHSC = dyn_cast<ConstantSDNode>(LHS);
	auto *RHSC = dyn_cast<ConstantSDNode>(RHS);

	// Canonicalize constant to RHS.
	if (LHSC && !RHSC)
	return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), RHS, LHS,
	CarryIn);

	// If the LHS and RHS of the ADC node are zero, then it can't overflow and
	// the result is either zero or one (depending on the input carry bit).
	// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
	if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() &&
	// We don't have a good way to replace an EFLAGS use, so only do this when
	// dead right now.
	SDValue(N, 1).use_empty()) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
	SDValue Res1 = DAG.getNode(
	ISD::AND, DL, VT,
	DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), CarryIn),
	DAG.getConstant(1, DL, VT));
	return DCI.CombineTo(N, Res1, CarryOut);
	}

	// Fold ADC(C1,C2,Carry) -> ADC(0,C1+C2,Carry)
	// iff the flag result is dead.
	// TODO: Allow flag result if C1+C2 doesn't signed/unsigned overflow.
	if (LHSC && RHSC && !LHSC->isZero() && !N->hasAnyUseOfValue(1)) {
	SDLoc DL(N);
	APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue();
	return DAG.getNode(X86ISD::ADC, DL, N->getVTList(),
	DAG.getConstant(0, DL, LHS.getValueType()),
	DAG.getConstant(Sum, DL, LHS.getValueType()), CarryIn);
	}

	if (SDValue Flags = combineCarryThroughADD(CarryIn, DAG)) {
	MVT VT = N->getSimpleValueType(0);
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, LHS, RHS, Flags);
	}

	// Fold ADC(ADD(X,Y),0,Carry) -> ADC(X,Y,Carry)
	// iff the flag result is dead.
	if (LHS.getOpcode() == ISD::ADD && RHSC && RHSC->isZero() &&
	!N->hasAnyUseOfValue(1))
	return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), LHS.getOperand(0),
	LHS.getOperand(1), CarryIn);

	return SDValue();
	}

	static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
	const SDLoc &DL, EVT VT,
	const X86Subtarget &Subtarget) {
	// Example of pattern we try to detect:
	// t := (v8i32 mul (sext (v8i16 x0), (sext (v8i16 x1))))
	//(add (build_vector (extract_elt t, 0),
	// (extract_elt t, 2),
	// (extract_elt t, 4),
	// (extract_elt t, 6)),
	// (build_vector (extract_elt t, 1),
	// (extract_elt t, 3),
	// (extract_elt t, 5),
	// (extract_elt t, 7)))

	if (!Subtarget.hasSSE2())
	return SDValue();

	if (Op0.getOpcode() != ISD::BUILD_VECTOR \|\|
	Op1.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i32 \|\|
	VT.getVectorNumElements() < 4 \|\|
	!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();

	// Check if one of Op0,Op1 is of the form:
	// (build_vector (extract_elt Mul, 0),
	// (extract_elt Mul, 2),
	// (extract_elt Mul, 4),
	// ...
	// the other is of the form:
	// (build_vector (extract_elt Mul, 1),
	// (extract_elt Mul, 3),
	// (extract_elt Mul, 5),
	// ...
	// and identify Mul.
	SDValue Mul;
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) {
	SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i),
	Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1);
	// TODO: Be more tolerant to undefs.
	if (Op0L.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op1L.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op0H.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op1H.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();
	auto *Const0L = dyn_cast<ConstantSDNode>(Op0L->getOperand(1));
	auto *Const1L = dyn_cast<ConstantSDNode>(Op1L->getOperand(1));
	auto *Const0H = dyn_cast<ConstantSDNode>(Op0H->getOperand(1));
	auto *Const1H = dyn_cast<ConstantSDNode>(Op1H->getOperand(1));
	if (!Const0L \|\| !Const1L \|\| !Const0H \|\| !Const1H)
	return SDValue();
	unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(),
	Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue();
	// Commutativity of mul allows factors of a product to reorder.
	if (Idx0L > Idx1L)
	std::swap(Idx0L, Idx1L);
	if (Idx0H > Idx1H)
	std::swap(Idx0H, Idx1H);
	// Commutativity of add allows pairs of factors to reorder.
	if (Idx0L > Idx0H) {
	std::swap(Idx0L, Idx0H);
	std::swap(Idx1L, Idx1H);
	}
	if (Idx0L != 2 * i \|\| Idx1L != 2 * i + 1 \|\| Idx0H != 2 * i + 2 \|\|
	Idx1H != 2 * i + 3)
	return SDValue();
	if (!Mul) {
	// First time an extract_elt's source vector is visited. Must be a MUL
	// with 2X number of vector elements than the BUILD_VECTOR.
	// Both extracts must be from same MUL.
	Mul = Op0L->getOperand(0);
	if (Mul->getOpcode() != ISD::MUL \|\|
	Mul.getValueType().getVectorNumElements() != 2 * e)
	return SDValue();
	}
	// Check that the extract is from the same MUL previously seen.
	if (Mul != Op0L->getOperand(0) \|\| Mul != Op1L->getOperand(0) \|\|
	Mul != Op0H->getOperand(0) \|\| Mul != Op1H->getOperand(0))
	return SDValue();
	}

	// Check if the Mul source can be safely shrunk.
	ShrinkMode Mode;
	if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) \|\|
	Mode == ShrinkMode::MULU16)
	return SDValue();

	EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
	VT.getVectorNumElements() * 2);
	SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0));
	SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1));

	auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	EVT InVT = Ops[0].getValueType();
	assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
	EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	InVT.getVectorNumElements() / 2);
	return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]);
	};
	return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder);
	}

	// Attempt to turn this pattern into PMADDWD.
	// (add (mul (sext (build_vector)), (sext (build_vector))),
	// (mul (sext (build_vector)), (sext (build_vector)))
	static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
	const SDLoc &DL, EVT VT,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasSSE2())
	return SDValue();

	if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() != ISD::MUL)
	return SDValue();

	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i32 \|\|
	VT.getVectorNumElements() < 4 \|\|
	!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	SDValue N10 = N1.getOperand(0);
	SDValue N11 = N1.getOperand(1);

	// All inputs need to be sign extends.
	// TODO: Support ZERO_EXTEND from known positive?
	if (N00.getOpcode() != ISD::SIGN_EXTEND \|\|
	N01.getOpcode() != ISD::SIGN_EXTEND \|\|
	N10.getOpcode() != ISD::SIGN_EXTEND \|\|
	N11.getOpcode() != ISD::SIGN_EXTEND)
	return SDValue();

	// Peek through the extends.
	N00 = N00.getOperand(0);
	N01 = N01.getOperand(0);
	N10 = N10.getOperand(0);
	N11 = N11.getOperand(0);

	// Must be extending from vXi16.
	EVT InVT = N00.getValueType();
	if (InVT.getVectorElementType() != MVT::i16 \|\| N01.getValueType() != InVT \|\|
	N10.getValueType() != InVT \|\| N11.getValueType() != InVT)
	return SDValue();

	// All inputs should be build_vectors.
	if (N00.getOpcode() != ISD::BUILD_VECTOR \|\|
	N01.getOpcode() != ISD::BUILD_VECTOR \|\|
	N10.getOpcode() != ISD::BUILD_VECTOR \|\|
	N11.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	// For each element, we need to ensure we have an odd element from one vector
	// multiplied by the odd element of another vector and the even element from
	// one of the same vectors being multiplied by the even element from the
	// other vector. So we need to make sure for each element i, this operator
	// is being performed:
	// A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]
	SDValue In0, In1;
	for (unsigned i = 0; i != N00.getNumOperands(); ++i) {
	SDValue N00Elt = N00.getOperand(i);
	SDValue N01Elt = N01.getOperand(i);
	SDValue N10Elt = N10.getOperand(i);
	SDValue N11Elt = N11.getOperand(i);
	// TODO: Be more tolerant to undefs.
	if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();
	auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
	auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
	auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
	auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
	if (!ConstN00Elt \|\| !ConstN01Elt \|\| !ConstN10Elt \|\| !ConstN11Elt)
	return SDValue();
	unsigned IdxN00 = ConstN00Elt->getZExtValue();
	unsigned IdxN01 = ConstN01Elt->getZExtValue();
	unsigned IdxN10 = ConstN10Elt->getZExtValue();
	unsigned IdxN11 = ConstN11Elt->getZExtValue();
	// Add is commutative so indices can be reordered.
	if (IdxN00 > IdxN10) {
	std::swap(IdxN00, IdxN10);
	std::swap(IdxN01, IdxN11);
	}
	// N0 indices be the even element. N1 indices must be the next odd element.
	if (IdxN00 != 2 * i \|\| IdxN10 != 2 * i + 1 \|\|
	IdxN01 != 2 * i \|\| IdxN11 != 2 * i + 1)
	return SDValue();
	SDValue N00In = N00Elt.getOperand(0);
	SDValue N01In = N01Elt.getOperand(0);
	SDValue N10In = N10Elt.getOperand(0);
	SDValue N11In = N11Elt.getOperand(0);

	// First time we find an input capture it.
	if (!In0) {
	In0 = N00In;
	In1 = N01In;

	// The input vectors must be at least as wide as the output.
	// If they are larger than the output, we extract subvector below.
	if (In0.getValueSizeInBits() < VT.getSizeInBits() \|\|
	In1.getValueSizeInBits() < VT.getSizeInBits())
	return SDValue();
	}
	// Mul is commutative so the input vectors can be in any order.
	// Canonicalize to make the compares easier.
	if (In0 != N00In)
	std::swap(N00In, N01In);
	if (In0 != N10In)
	std::swap(N10In, N11In);
	if (In0 != N00In \|\| In1 != N01In \|\| In0 != N10In \|\| In1 != N11In)
	return SDValue();
	}

	auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
	ArrayRef<SDValue> Ops) {
	EVT OpVT = Ops[0].getValueType();
	assert(OpVT.getScalarType() == MVT::i16 &&
	"Unexpected scalar element type");
	assert(OpVT == Ops[1].getValueType() && "Operands' types mismatch");
	EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	OpVT.getVectorNumElements() / 2);
	return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]);
	};

	// If the output is narrower than an input, extract the low part of the input
	// vector.
	EVT OutVT16 = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
	VT.getVectorNumElements() * 2);
	if (OutVT16.bitsLT(In0.getValueType())) {
	In0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In0,
	DAG.getIntPtrConstant(0, DL));
	}
	if (OutVT16.bitsLT(In1.getValueType())) {
	In1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In1,
	DAG.getIntPtrConstant(0, DL));
	}
	return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },
	PMADDBuilder);
	}

	// ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
	// If upper element in each pair of both VPMADDWD are zero then we can merge
	// the operand elements and use the implicit add of VPMADDWD.
	// TODO: Add support for VPMADDUBSW (which isn't commutable).
	static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
	const SDLoc &DL, EVT VT) {
	if (N0.getOpcode() != N1.getOpcode() \|\| N0.getOpcode() != X86ISD::VPMADDWD)
	return SDValue();

	// TODO: Add 256/512-bit support once VPMADDWD combines with shuffles.
	if (VT.getSizeInBits() > 128)
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();
	MVT OpVT = N0.getOperand(0).getSimpleValueType();
	APInt DemandedBits = APInt::getAllOnes(OpVT.getScalarSizeInBits());
	APInt DemandedHiElts = APInt::getSplat(2 * NumElts, APInt(2, 2));

	bool Op0HiZero =
	DAG.MaskedValueIsZero(N0.getOperand(0), DemandedBits, DemandedHiElts) \|\|
	DAG.MaskedValueIsZero(N0.getOperand(1), DemandedBits, DemandedHiElts);
	bool Op1HiZero =
	DAG.MaskedValueIsZero(N1.getOperand(0), DemandedBits, DemandedHiElts) \|\|
	DAG.MaskedValueIsZero(N1.getOperand(1), DemandedBits, DemandedHiElts);

	// TODO: Check for zero lower elements once we have actual codegen that
	// creates them.
	if (!Op0HiZero \|\| !Op1HiZero)
	return SDValue();

	// Create a shuffle mask packing the lower elements from each VPMADDWD.
	SmallVector<int> Mask;
	for (int i = 0; i != (int)NumElts; ++i) {
	Mask.push_back(2 * i);
	Mask.push_back(2 * (i + NumElts));
	}

	SDValue LHS =
	DAG.getVectorShuffle(OpVT, DL, N0.getOperand(0), N1.getOperand(0), Mask);
	SDValue RHS =
	DAG.getVectorShuffle(OpVT, DL, N0.getOperand(1), N1.getOperand(1), Mask);
	return DAG.getNode(X86ISD::VPMADDWD, DL, VT, LHS, RHS);
	}

	/// CMOV of constants requires materializing constant operands in registers.
	/// Try to fold those constants into an 'add' instruction to reduce instruction
	/// count. We do this with CMOV rather the generic 'select' because there are
	/// earlier folds that may be used to turn select-of-constants into logic hacks.
	static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// If an operand is zero, add-of-0 gets simplified away, so that's clearly
	// better because we eliminate 1-2 instructions. This transform is still
	// an improvement without zero operands because we trade 2 move constants and
	// 1 add for 2 adds (LEA) as long as the constants can be represented as
	// immediate asm operands (fit in 32-bits).
	auto isSuitableCmov = [](SDValue V) {
	if (V.getOpcode() != X86ISD::CMOV \|\| !V.hasOneUse())
	return false;
	if (!isa<ConstantSDNode>(V.getOperand(0)) \|\|
	!isa<ConstantSDNode>(V.getOperand(1)))
	return false;
	return isNullConstant(V.getOperand(0)) \|\| isNullConstant(V.getOperand(1)) \|\|
	(V.getConstantOperandAPInt(0).isSignedIntN(32) &&
	V.getConstantOperandAPInt(1).isSignedIntN(32));
	};

	// Match an appropriate CMOV as the first operand of the add.
	SDValue Cmov = N->getOperand(0);
	SDValue OtherOp = N->getOperand(1);
	if (!isSuitableCmov(Cmov))
	std::swap(Cmov, OtherOp);
	if (!isSuitableCmov(Cmov))
	return SDValue();

	// Don't remove a load folding opportunity for the add. That would neutralize
	// any improvements from removing constant materializations.
	if (X86::mayFoldLoad(OtherOp, Subtarget))
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	SDValue FalseOp = Cmov.getOperand(0);
	SDValue TrueOp = Cmov.getOperand(1);

	// We will push the add through the select, but we can potentially do better
	// if we know there is another add in the sequence and this is pointer math.
	// In that case, we can absorb an add into the trailing memory op and avoid
	// a 3-operand LEA which is likely slower than a 2-operand LEA.
	// TODO: If target has "slow3OpsLEA", do this even without the trailing memop?
	if (OtherOp.getOpcode() == ISD::ADD && OtherOp.hasOneUse() &&
	!isa<ConstantSDNode>(OtherOp.getOperand(0)) &&
	all_of(N->uses(), [&](SDNode *Use) {
	auto *MemNode = dyn_cast<MemSDNode>(Use);
	return MemNode && MemNode->getBasePtr().getNode() == N;
	})) {
	// add (cmov C1, C2), add (X, Y) --> add (cmov (add X, C1), (add X, C2)), Y
	// TODO: We are arbitrarily choosing op0 as the 1st piece of the sum, but
	// it is possible that choosing op1 might be better.
	SDValue X = OtherOp.getOperand(0), Y = OtherOp.getOperand(1);
	FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp);
	TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp);
	Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp,
	Cmov.getOperand(2), Cmov.getOperand(3));
	return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y);
	}

	// add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
	FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);
	TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);
	return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),
	Cmov.getOperand(3));
	}

	static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	SDLoc DL(N);

	if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget))
	return Select;

	if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
	return MAdd;
	if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, DL, VT, Subtarget))
	return MAdd;
	if (SDValue MAdd = combineAddOfPMADDWD(DAG, Op0, Op1, DL, VT))
	return MAdd;

	// Try to synthesize horizontal adds from adds of shuffles.
	if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
	return V;

	// If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into
	// (sub Y, (sext (vXi1 X))).
	// FIXME: We have the (sub Y, (zext (vXi1 X))) -> (add (sext (vXi1 X)), Y) in
	// generic DAG combine without a legal type check, but adding this there
	// caused regressions.
	if (VT.isVector()) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (Op0.getOpcode() == ISD::ZERO_EXTEND &&
	Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
	TLI.isTypeLegal(Op0.getOperand(0).getValueType())) {
	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0));
	return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt);
	}

	if (Op1.getOpcode() == ISD::ZERO_EXTEND &&
	Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
	TLI.isTypeLegal(Op1.getOperand(0).getValueType())) {
	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0));
	return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt);
	}
	}

	// Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W)
	if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() &&
	X86::isZeroNode(Op0.getOperand(1))) {
	assert(!Op0->hasAnyUseOfValue(1) && "Overflow bit in use");
	return DAG.getNode(X86ISD::ADC, SDLoc(Op0), Op0->getVTList(), Op1,
	Op0.getOperand(0), Op0.getOperand(2));
	}

	return combineAddOrSubToADCOrSBB(N, DAG);
	}

	// Try to fold (sub Y, cmovns X, -X) -> (add Y, cmovns -X, X) if the cmov
	// condition comes from the subtract node that produced -X. This matches the
	// cmov expansion for absolute value. By swapping the operands we convert abs
	// to nabs.
	static SDValue combineSubABS(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	if (N1.getOpcode() != X86ISD::CMOV \|\| !N1.hasOneUse())
	return SDValue();

	X86::CondCode CC = (X86::CondCode)N1.getConstantOperandVal(2);
	if (CC != X86::COND_S && CC != X86::COND_NS)
	return SDValue();

	// Condition should come from a negate operation.
	SDValue Cond = N1.getOperand(3);
	if (Cond.getOpcode() != X86ISD::SUB \|\| !isNullConstant(Cond.getOperand(0)))
	return SDValue();
	assert(Cond.getResNo() == 1 && "Unexpected result number");

	// Get the X and -X from the negate.
	SDValue NegX = Cond.getValue(0);
	SDValue X = Cond.getOperand(1);

	SDValue FalseOp = N1.getOperand(0);
	SDValue TrueOp = N1.getOperand(1);

	// Cmov operands should be X and NegX. Order doesn't matter.
	if (!(TrueOp == X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp == X))
	return SDValue();

	// Build a new CMOV with the operands swapped.
	SDLoc DL(N);
	MVT VT = N->getSimpleValueType(0);
	SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,
	N1.getOperand(2), Cond);
	// Convert sub to add.
	return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);
	}

	static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// TODO: Add NoOpaque handling to isConstantIntBuildVectorOrConstantInt.
	auto IsNonOpaqueConstant = [&](SDValue Op) {
	if (SDNode *C = DAG.isConstantIntBuildVectorOrConstantInt(Op)) {
	if (auto *Cst = dyn_cast<ConstantSDNode>(C))
	return !Cst->isOpaque();
	return true;
	}
	return false;
	};

	// X86 can't encode an immediate LHS of a sub. See if we can push the
	// negation into a preceding instruction. If the RHS of the sub is a XOR with
	// one use and a constant, invert the immediate, saving one register.
	// sub(C1, xor(X, C2)) -> add(xor(X, ~C2), C1+1)
	if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) &&
	IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) {
	SDLoc DL(N);
	EVT VT = Op0.getValueType();
	SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0),
	DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT));
	SDValue NewAdd =
	DAG.getNode(ISD::ADD, DL, VT, Op0, DAG.getConstant(1, DL, VT));
	return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd);
	}

	if (SDValue V = combineSubABS(N, DAG))
	return V;

	// Try to synthesize horizontal subs from subs of shuffles.
	if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
	return V;

	// Fold SUB(X,ADC(Y,0,W)) -> SBB(X,Y,W)
	if (Op1.getOpcode() == X86ISD::ADC && Op1->hasOneUse() &&
	X86::isZeroNode(Op1.getOperand(1))) {
	assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use");
	return DAG.getNode(X86ISD::SBB, SDLoc(Op1), Op1->getVTList(), Op0,
	Op1.getOperand(0), Op1.getOperand(2));
	}

	// Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y)
	// Don't fold to ADC(0,0,W)/SETCC_CARRY pattern which will prevent more folds.
	if (Op1.getOpcode() == X86ISD::SBB && Op1->hasOneUse() &&
	!(X86::isZeroNode(Op0) && X86::isZeroNode(Op1.getOperand(1)))) {
	assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use");
	SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0,
	Op1.getOperand(1), Op1.getOperand(2));
	return DAG.getNode(ISD::SUB, SDLoc(N), Op0.getValueType(), ADC.getValue(0),
	Op1.getOperand(0));
	}

	return combineAddOrSubToADCOrSBB(N, DAG);
	}

	static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = N->getSimpleValueType(0);
	SDLoc DL(N);

	if (N->getOperand(0) == N->getOperand(1)) {
	if (N->getOpcode() == X86ISD::PCMPEQ)
	return DAG.getConstant(-1, DL, VT);
	if (N->getOpcode() == X86ISD::PCMPGT)
	return DAG.getConstant(0, DL, VT);
	}

	return SDValue();
	}

	/// Helper that combines an array of subvector ops as if they were the operands
	/// of a ISD::CONCAT_VECTORS node, but may have come from another source (e.g.
	/// ISD::INSERT_SUBVECTOR). The ops are assumed to be of the same type.
	static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
	ArrayRef<SDValue> Ops, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
	unsigned EltSizeInBits = VT.getScalarSizeInBits();

	if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
	return DAG.getUNDEF(VT);

	if (llvm::all_of(Ops, [](SDValue Op) {
	return ISD::isBuildVectorAllZeros(Op.getNode());
	}))
	return getZeroVector(VT, Subtarget, DAG, DL);

	SDValue Op0 = Ops[0];
	bool IsSplat = llvm::all_equal(Ops);

	// Repeated subvectors.
	if (IsSplat &&
	(VT.is256BitVector() \|\| (VT.is512BitVector() && Subtarget.hasAVX512()))) {
	// If this broadcast is inserted into both halves, use a larger broadcast.
	if (Op0.getOpcode() == X86ISD::VBROADCAST)
	return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));

	// If this simple subvector or scalar/subvector broadcast_load is inserted
	// into both halves, use a larger broadcast_load. Update other uses to use
	// an extracted subvector.
	if (ISD::isNormalLoad(Op0.getNode()) \|\|
	Op0.getOpcode() == X86ISD::VBROADCAST_LOAD \|\|
	Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
	auto *Mem = cast<MemSDNode>(Op0);
	unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD
	? X86ISD::VBROADCAST_LOAD
	: X86ISD::SUBV_BROADCAST_LOAD;
	if (SDValue BcastLd =
	getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) {
	SDValue BcastSrc =
	extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits());
	DAG.ReplaceAllUsesOfValueWith(Op0, BcastSrc);
	return BcastLd;
	}
	}

	// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
	if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
	(Subtarget.hasAVX2() \|\|
	X86::mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0),
	VT.getScalarType(), Subtarget)))
	return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
	Op0.getOperand(0),
	DAG.getIntPtrConstant(0, DL)));

	// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
	if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	(Subtarget.hasAVX2() \|\|
	(EltSizeInBits >= 32 &&
	X86::mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
	Op0.getOperand(0).getValueType() == VT.getScalarType())
	return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));

	// concat_vectors(extract_subvector(broadcast(x)),
	// extract_subvector(broadcast(x))) -> broadcast(x)
	if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Op0.getOperand(0).getValueType() == VT) {
	if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST \|\|
	Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD)
	return Op0.getOperand(0);
	}
	}

	// concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.
	// Only concat of subvector high halves which vperm2x128 is best at.
	// TODO: This should go in combineX86ShufflesRecursively eventually.
	if (VT.is256BitVector() && Ops.size() == 2) {
	SDValue Src0 = peekThroughBitcasts(Ops[0]);
	SDValue Src1 = peekThroughBitcasts(Ops[1]);
	if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	EVT SrcVT0 = Src0.getOperand(0).getValueType();
	EVT SrcVT1 = Src1.getOperand(0).getValueType();
	unsigned NumSrcElts0 = SrcVT0.getVectorNumElements();
	unsigned NumSrcElts1 = SrcVT1.getVectorNumElements();
	if (SrcVT0.is256BitVector() && SrcVT1.is256BitVector() &&
	Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2) &&
	Src1.getConstantOperandAPInt(1) == (NumSrcElts1 / 2)) {
	return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
	DAG.getBitcast(VT, Src0.getOperand(0)),
	DAG.getBitcast(VT, Src1.getOperand(0)),
	DAG.getTargetConstant(0x31, DL, MVT::i8));
	}
	}
	}

	// Repeated opcode.
	// TODO - combineX86ShufflesRecursively should handle shuffle concatenation
	// but it currently struggles with different vector widths.
	if (llvm::all_of(Ops, [Op0](SDValue Op) {
	return Op.getOpcode() == Op0.getOpcode();
	})) {
	auto ConcatSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
	SmallVector<SDValue> Subs;
	for (SDValue SubOp : SubOps)
	Subs.push_back(SubOp.getOperand(I));
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
	};
	auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
	for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
	SDValue Sub = SubOps[I].getOperand(Op);
	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
	if (Sub.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	Sub.getOperand(0).getValueType() != VT \|\|
	Sub.getConstantOperandAPInt(1) != (I * NumSubElts))
	return false;
	}
	return true;
	};

	unsigned NumOps = Ops.size();
	switch (Op0.getOpcode()) {
	case X86ISD::VBROADCAST: {
	if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
	return Op.getOperand(0).getValueType().is128BitVector();
	})) {
	if (VT == MVT::v4f64 \|\| VT == MVT::v4i64)
	return DAG.getNode(X86ISD::UNPCKL, DL, VT,
	ConcatSubOperand(VT, Ops, 0),
	ConcatSubOperand(VT, Ops, 0));
	// TODO: Add pseudo v8i32 PSHUFD handling to AVX1Only targets.
	if (VT == MVT::v8f32 \|\| (VT == MVT::v8i32 && Subtarget.hasInt256()))
	return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI
	: X86ISD::PSHUFD,
	DL, VT, ConcatSubOperand(VT, Ops, 0),
	getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
	}
	break;
	}
	case X86ISD::MOVDDUP:
	case X86ISD::MOVSHDUP:
	case X86ISD::MOVSLDUP: {
	if (!IsSplat)
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0));
	break;
	}
	case X86ISD::SHUFP: {
	// Add SHUFPD support if/when necessary.
	if (!IsSplat && VT.getScalarType() == MVT::f32 &&
	llvm::all_of(Ops, [Op0](SDValue Op) {
	return Op.getOperand(2) == Op0.getOperand(2);
	})) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0),
	ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
	}
	break;
	}
	case X86ISD::PSHUFHW:
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFD:
	if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
	Subtarget.hasInt256() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));
	}
	[[fallthrough]];
	case X86ISD::VPERMILPI:
	if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 \|\| VT == MVT::v8i32) &&
	Op0.getOperand(1) == Ops[1].getOperand(1)) {
	SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0));
	Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res,
	Op0.getOperand(1));
	return DAG.getBitcast(VT, Res);
	}
	if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
	uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
	uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
	uint64_t Idx = ((Idx1 & 3) << 2) \| (Idx0 & 3);
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0),
	DAG.getTargetConstant(Idx, DL, MVT::i8));
	}
	break;
	case X86ISD::PSHUFB:
	if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) \|\|
	(VT.is512BitVector() && Subtarget.useBWIRegs()))) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0),
	ConcatSubOperand(VT, Ops, 1));
	}
	break;
	case X86ISD::VPERMV3:
	if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
	MVT OpVT = Op0.getSimpleValueType();
	int NumSrcElts = OpVT.getVectorNumElements();
	SmallVector<int, 64> ConcatMask;
	for (unsigned i = 0; i != NumOps; ++i) {
	SmallVector<int, 64> SubMask;
	SmallVector<SDValue, 2> SubOps;
	if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps,
	SubMask))
	break;
	for (int M : SubMask) {
	if (0 <= M) {
	M += M < NumSrcElts ? 0 : NumSrcElts;
	M += i * NumSrcElts;
	}
	ConcatMask.push_back(M);
	}
	}
	if (ConcatMask.size() == (NumOps * NumSrcElts)) {
	SDValue Src0 = concatSubVectors(Ops[0].getOperand(0),
	Ops[1].getOperand(0), DAG, DL);
	SDValue Src1 = concatSubVectors(Ops[0].getOperand(2),
	Ops[1].getOperand(2), DAG, DL);
	MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
	MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
	SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
	return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
	}
	}
	break;
	case X86ISD::VSHLI:
	case X86ISD::VSRLI:
	// Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle.
	// TODO: Move this to LowerShiftByScalarImmediate?
	if (VT == MVT::v4i64 && !Subtarget.hasInt256() &&
	llvm::all_of(Ops, [](SDValue Op) {
	return Op.getConstantOperandAPInt(1) == 32;
	})) {
	SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0));
	SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL);
	if (Op0.getOpcode() == X86ISD::VSHLI) {
	Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero,
	{8, 0, 8, 2, 8, 4, 8, 6});
	} else {
	Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero,
	{1, 8, 3, 8, 5, 8, 7, 8});
	}
	return DAG.getBitcast(VT, Res);
	}
	[[fallthrough]];
	case X86ISD::VSRAI:
	case X86ISD::VSHL:
	case X86ISD::VSRL:
	case X86ISD::VSRA:
	if (((VT.is256BitVector() && Subtarget.hasInt256()) \|\|
	(VT.is512BitVector() && Subtarget.useAVX512Regs() &&
	(EltSizeInBits >= 32 \|\| Subtarget.useBWIRegs()))) &&
	llvm::all_of(Ops, [Op0](SDValue Op) {
	return Op0.getOperand(1) == Op.getOperand(1);
	})) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));
	}
	break;
	case X86ISD::VPERMI:
	case X86ISD::VROTLI:
	case X86ISD::VROTRI:
	if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
	llvm::all_of(Ops, [Op0](SDValue Op) {
	return Op0.getOperand(1) == Op.getOperand(1);
	})) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));
	}
	break;
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	case X86ISD::ANDNP:
	// TODO: Add 256-bit support.
	if (!IsSplat && VT.is512BitVector()) {
	MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
	SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
	NumOps * SrcVT.getVectorNumElements());
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(SrcVT, Ops, 0),
	ConcatSubOperand(SrcVT, Ops, 1));
	}
	break;
	case X86ISD::GF2P8AFFINEQB:
	if (!IsSplat &&
	(VT.is256BitVector() \|\|
	(VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
	llvm::all_of(Ops, [Op0](SDValue Op) {
	return Op0.getOperand(2) == Op.getOperand(2);
	})) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0),
	ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
	}
	break;
	case X86ISD::HADD:
	case X86ISD::HSUB:
	case X86ISD::FHADD:
	case X86ISD::FHSUB:
	case X86ISD::PACKSS:
	case X86ISD::PACKUS:
	if (!IsSplat && VT.is256BitVector() &&
	(VT.isFloatingPoint() \|\| Subtarget.hasInt256())) {
	MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
	SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
	NumOps * SrcVT.getVectorNumElements());
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(SrcVT, Ops, 0),
	ConcatSubOperand(SrcVT, Ops, 1));
	}
	break;
	case X86ISD::PALIGNR:
	if (!IsSplat &&
	((VT.is256BitVector() && Subtarget.hasInt256()) \|\|
	(VT.is512BitVector() && Subtarget.useBWIRegs())) &&
	llvm::all_of(Ops, [Op0](SDValue Op) {
	return Op0.getOperand(2) == Op.getOperand(2);
	})) {
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(VT, Ops, 0),
	ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
	}
	break;
	case ISD::VSELECT:
	case X86ISD::BLENDV:
	if (!IsSplat && VT.is256BitVector() && Ops.size() == 2 &&
	(VT.getScalarSizeInBits() >= 32 \|\| Subtarget.hasInt256()) &&
	IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {
	EVT SelVT = Ops[0].getOperand(0).getValueType();
	SelVT = SelVT.getDoubleNumVectorElementsVT(*DAG.getContext());
	if (DAG.getTargetLoweringInfo().isTypeLegal(SelVT))
	return DAG.getNode(Op0.getOpcode(), DL, VT,
	ConcatSubOperand(SelVT.getSimpleVT(), Ops, 0),
	ConcatSubOperand(VT, Ops, 1),
	ConcatSubOperand(VT, Ops, 2));
	}
	break;
	}
	}

	// Fold subvector loads into one.
	// If needed, look through bitcasts to get to the load.
	if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
	unsigned Fast;
	const X86TargetLowering *TLI = Subtarget.getTargetLowering();
	if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
	*FirstLd->getMemOperand(), &Fast) &&
	Fast) {
	if (SDValue Ld =
	EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
	return Ld;
	}
	}

	// Attempt to fold target constant loads.
	if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
	SmallVector<APInt> EltBits;
	APInt UndefElts = APInt::getNullValue(VT.getVectorNumElements());
	for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
	APInt OpUndefElts;
	SmallVector<APInt> OpEltBits;
	if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts,
	OpEltBits, true, false))
	break;
	EltBits.append(OpEltBits);
	UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth());
	}
	if (EltBits.size() == VT.getVectorNumElements())
	return getConstVector(EltBits, UndefElts, VT, DAG, DL);
	}

	return SDValue();
	}

	static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	EVT SrcVT = N->getOperand(0).getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Don't do anything for i1 vectors.
	if (VT.getVectorElementType() == MVT::i1)
	return SDValue();

	if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) {
	SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
	if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,
	DCI, Subtarget))
	return R;
	}

	return SDValue();
	}

	static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	MVT OpVT = N->getSimpleValueType(0);

	bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1;

	SDLoc dl(N);
	SDValue Vec = N->getOperand(0);
	SDValue SubVec = N->getOperand(1);

	uint64_t IdxVal = N->getConstantOperandVal(2);
	MVT SubVecVT = SubVec.getSimpleValueType();

	if (Vec.isUndef() && SubVec.isUndef())
	return DAG.getUNDEF(OpVT);

	// Inserting undefs/zeros into zeros/undefs is a zero vector.
	if ((Vec.isUndef() \|\| ISD::isBuildVectorAllZeros(Vec.getNode())) &&
	(SubVec.isUndef() \|\| ISD::isBuildVectorAllZeros(SubVec.getNode())))
	return getZeroVector(OpVT, Subtarget, DAG, dl);

	if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
	// If we're inserting into a zero vector and then into a larger zero vector,
	// just insert into the larger zero vector directly.
	if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
	ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
	uint64_t Idx2Val = SubVec.getConstantOperandVal(2);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
	getZeroVector(OpVT, Subtarget, DAG, dl),
	SubVec.getOperand(1),
	DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
	}

	// If we're inserting into a zero vector and our input was extracted from an
	// insert into a zero vector of the same type and the extraction was at
	// least as large as the original insertion. Just insert the original
	// subvector into a zero vector.
	if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 &&
	isNullConstant(SubVec.getOperand(1)) &&
	SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) {
	SDValue Ins = SubVec.getOperand(0);
	if (isNullConstant(Ins.getOperand(2)) &&
	ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
	Ins.getOperand(1).getValueSizeInBits().getFixedValue() <=
	SubVecVT.getFixedSizeInBits())
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
	getZeroVector(OpVT, Subtarget, DAG, dl),
	Ins.getOperand(1), N->getOperand(2));
	}
	}

	// Stop here if this is an i1 vector.
	if (IsI1Vector)
	return SDValue();

	// If this is an insert of an extract, combine to a shuffle. Don't do this
	// if the insert or extract can be represented with a subregister operation.
	if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	SubVec.getOperand(0).getSimpleValueType() == OpVT &&
	(IdxVal != 0 \|\|
	!(Vec.isUndef() \|\| ISD::isBuildVectorAllZeros(Vec.getNode())))) {
	int ExtIdxVal = SubVec.getConstantOperandVal(1);
	if (ExtIdxVal != 0) {
	int VecNumElts = OpVT.getVectorNumElements();
	int SubVecNumElts = SubVecVT.getVectorNumElements();
	SmallVector<int, 64> Mask(VecNumElts);
	// First create an identity shuffle mask.
	for (int i = 0; i != VecNumElts; ++i)
	Mask[i] = i;
	// Now insert the extracted portion.
	for (int i = 0; i != SubVecNumElts; ++i)
	Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;

	return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
	}
	}

	// Match concat_vector style patterns.
	SmallVector<SDValue, 2> SubVectorOps;
	if (collectConcatOps(N, SubVectorOps, DAG)) {
	if (SDValue Fold =
	combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget))
	return Fold;

	// If we're inserting all zeros into the upper half, change this to
	// a concat with zero. We will match this to a move
	// with implicit upper bit zeroing during isel.
	// We do this here because we don't want combineConcatVectorOps to
	// create INSERT_SUBVECTOR from CONCAT_VECTORS.
	if (SubVectorOps.size() == 2 &&
	ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode()))
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
	getZeroVector(OpVT, Subtarget, DAG, dl),
	SubVectorOps[0], DAG.getIntPtrConstant(0, dl));
	}

	// If this is a broadcast insert into an upper undef, use a larger broadcast.
	if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
	return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));

	// If this is a broadcast load inserted into an upper undef, use a larger
	// broadcast load.
	if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() &&
	SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) {
	auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec);
	SDVTList Tys = DAG.getVTList(OpVT, MVT::Other);
	SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
	SDValue BcastLd =
	DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
	MemIntr->getMemoryVT(),
	MemIntr->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
	return BcastLd;
	}

	// If we're splatting the lower half subvector of a full vector load into the
	// upper half, attempt to create a subvector broadcast.
	if (IdxVal == (OpVT.getVectorNumElements() / 2) && SubVec.hasOneUse() &&
	Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits())) {
	auto *VecLd = dyn_cast<LoadSDNode>(Vec);
	auto *SubLd = dyn_cast<LoadSDNode>(SubVec);
	if (VecLd && SubLd &&
	DAG.areNonVolatileConsecutiveLoads(SubLd, VecLd,
	SubVec.getValueSizeInBits() / 8, 0))
	return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, dl, OpVT, SubVecVT,
	SubLd, 0, DAG);
	}

	return SDValue();
	}

	/// If we are extracting a subvector of a vector select and the select condition
	/// is composed of concatenated vectors, try to narrow the select width. This
	/// is a common pattern for AVX1 integer code because 256-bit selects may be
	/// legal, but there is almost no integer math/logic available for 256-bit.
	/// This function should only be called with legal types (otherwise, the calls
	/// to get simple value types will assert).
	static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
	SDValue Sel = Ext->getOperand(0);
	SmallVector<SDValue, 4> CatOps;
	if (Sel.getOpcode() != ISD::VSELECT \|\|
	!collectConcatOps(Sel.getOperand(0).getNode(), CatOps, DAG))
	return SDValue();

	// Note: We assume simple value types because this should only be called with
	// legal operations/types.
	// TODO: This can be extended to handle extraction to 256-bits.
	MVT VT = Ext->getSimpleValueType(0);
	if (!VT.is128BitVector())
	return SDValue();

	MVT SelCondVT = Sel.getOperand(0).getSimpleValueType();
	if (!SelCondVT.is256BitVector() && !SelCondVT.is512BitVector())
	return SDValue();

	MVT WideVT = Ext->getOperand(0).getSimpleValueType();
	MVT SelVT = Sel.getSimpleValueType();
	assert((SelVT.is256BitVector() \|\| SelVT.is512BitVector()) &&
	"Unexpected vector type with legal operations");

	unsigned SelElts = SelVT.getVectorNumElements();
	unsigned CastedElts = WideVT.getVectorNumElements();
	unsigned ExtIdx = Ext->getConstantOperandVal(1);
	if (SelElts % CastedElts == 0) {
	// The select has the same or more (narrower) elements than the extract
	// operand. The extraction index gets scaled by that factor.
	ExtIdx *= (SelElts / CastedElts);
	} else if (CastedElts % SelElts == 0) {
	// The select has less (wider) elements than the extract operand. Make sure
	// that the extraction index can be divided evenly.
	unsigned IndexDivisor = CastedElts / SelElts;
	if (ExtIdx % IndexDivisor != 0)
	return SDValue();
	ExtIdx /= IndexDivisor;
	} else {
	llvm_unreachable("Element count of simple vector types are not divisible?");
	}

	unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits();
	unsigned NarrowElts = SelElts / NarrowingFactor;
	MVT NarrowSelVT = MVT::getVectorVT(SelVT.getVectorElementType(), NarrowElts);
	SDLoc DL(Ext);
	SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL);
	SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL);
	SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL);
	SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF);
	return DAG.getBitcast(VT, NarrowSel);
	}

	static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	// For AVX1 only, if we are extracting from a 256-bit and+not (which will
	// eventually get combined/lowered into ANDNP) with a concatenated operand,
	// split the 'and' into 128-bit ops to avoid the concatenate and extract.
	// We let generic combining take over from there to simplify the
	// insert/extract and 'not'.
	// This pattern emerges during AVX1 legalization. We handle it before lowering
	// to avoid complications like splitting constant vector loads.

	// Capture the original wide type in the likely case that we need to bitcast
	// back to this type.
	if (!N->getValueType(0).isSimple())
	return SDValue();

	MVT VT = N->getSimpleValueType(0);
	SDValue InVec = N->getOperand(0);
	unsigned IdxVal = N->getConstantOperandVal(1);
	SDValue InVecBC = peekThroughBitcasts(InVec);
	EVT InVecVT = InVec.getValueType();
	unsigned SizeInBits = VT.getSizeInBits();
	unsigned InSizeInBits = InVecVT.getSizeInBits();
	unsigned NumSubElts = VT.getVectorNumElements();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
	TLI.isTypeLegal(InVecVT) &&
	InSizeInBits == 256 && InVecBC.getOpcode() == ISD::AND) {
	auto isConcatenatedNot = [](SDValue V) {
	V = peekThroughBitcasts(V);
	if (!isBitwiseNot(V))
	return false;
	SDValue NotOp = V->getOperand(0);
	return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS;
	};
	if (isConcatenatedNot(InVecBC.getOperand(0)) \|\|
	isConcatenatedNot(InVecBC.getOperand(1))) {
	// extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
	SDValue Concat = splitVectorIntBinary(InVecBC, DAG);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
	DAG.getBitcast(InVecVT, Concat), N->getOperand(1));
	}
	}

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue V = narrowExtractedVectorSelect(N, DAG))
	return V;

	if (ISD::isBuildVectorAllZeros(InVec.getNode()))
	return getZeroVector(VT, Subtarget, DAG, SDLoc(N));

	if (ISD::isBuildVectorAllOnes(InVec.getNode())) {
	if (VT.getScalarType() == MVT::i1)
	return DAG.getConstant(1, SDLoc(N), VT);
	return getOnesVector(VT, DAG, SDLoc(N));
	}

	if (InVec.getOpcode() == ISD::BUILD_VECTOR)
	return DAG.getBuildVector(VT, SDLoc(N),
	InVec->ops().slice(IdxVal, NumSubElts));

	// If we are extracting from an insert into a larger vector, replace with a
	// smaller insert if we don't access less than the original subvector. Don't
	// do this for i1 vectors.
	// TODO: Relax the matching indices requirement?
	if (VT.getVectorElementType() != MVT::i1 &&
	InVec.getOpcode() == ISD::INSERT_SUBVECTOR && InVec.hasOneUse() &&
	IdxVal == InVec.getConstantOperandVal(2) &&
	InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) {
	SDLoc DL(N);
	SDValue NewExt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT,
	InVec.getOperand(0), N->getOperand(1));
	unsigned NewIdxVal = InVec.getConstantOperandVal(2) - IdxVal;
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, NewExt,
	InVec.getOperand(1),
	DAG.getVectorIdxConstant(NewIdxVal, DL));
	}

	// If we're extracting an upper subvector from a broadcast we should just
	// extract the lowest subvector instead which should allow
	// SimplifyDemandedVectorElts do more simplifications.
	if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST \|\|
	InVec.getOpcode() == X86ISD::VBROADCAST_LOAD \|\|
	DAG.isSplatValue(InVec, /AllowUndefs/ false)))
	return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits);

	// If we're extracting a broadcasted subvector, just use the lowest subvector.
	if (IdxVal != 0 && InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
	cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT)
	return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits);

	// Attempt to extract from the source of a shuffle vector.
	if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) {
	SmallVector<int, 32> ShuffleMask;
	SmallVector<int, 32> ScaledMask;
	SmallVector<SDValue, 2> ShuffleInputs;
	unsigned NumSubVecs = InSizeInBits / SizeInBits;
	// Decode the shuffle mask and scale it so its shuffling subvectors.
	if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) &&
	scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) {
	unsigned SubVecIdx = IdxVal / NumSubElts;
	if (ScaledMask[SubVecIdx] == SM_SentinelUndef)
	return DAG.getUNDEF(VT);
	if (ScaledMask[SubVecIdx] == SM_SentinelZero)
	return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
	SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
	if (Src.getValueSizeInBits() == InSizeInBits) {
	unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs;
	unsigned SrcEltIdx = SrcSubVecIdx * NumSubElts;
	return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG,
	SDLoc(N), SizeInBits);
	}
	}
	}

	// If we're extracting the lowest subvector and we're the only user,
	// we may be able to perform this with a smaller vector width.
	unsigned InOpcode = InVec.getOpcode();
	if (InVec.hasOneUse()) {
	if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
	// v2f64 CVTDQ2PD(v4i32).
	if (InOpcode == ISD::SINT_TO_FP &&
	InVec.getOperand(0).getValueType() == MVT::v4i32) {
	return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0));
	}
	// v2f64 CVTUDQ2PD(v4i32).
	if (InOpcode == ISD::UINT_TO_FP && Subtarget.hasVLX() &&
	InVec.getOperand(0).getValueType() == MVT::v4i32) {
	return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0));
	}
	// v2f64 CVTPS2PD(v4f32).
	if (InOpcode == ISD::FP_EXTEND &&
	InVec.getOperand(0).getValueType() == MVT::v4f32) {
	return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0));
	}
	}
	if (IdxVal == 0 &&
	(InOpcode == ISD::ANY_EXTEND \|\|
	InOpcode == ISD::ANY_EXTEND_VECTOR_INREG \|\|
	InOpcode == ISD::ZERO_EXTEND \|\|
	InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG \|\|
	InOpcode == ISD::SIGN_EXTEND \|\|
	InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
	(SizeInBits == 128 \|\| SizeInBits == 256) &&
	InVec.getOperand(0).getValueSizeInBits() >= SizeInBits) {
	SDLoc DL(N);
	SDValue Ext = InVec.getOperand(0);
	if (Ext.getValueSizeInBits() > SizeInBits)
	Ext = extractSubVector(Ext, 0, DAG, DL, SizeInBits);
	unsigned ExtOp = DAG.getOpcode_EXTEND_VECTOR_INREG(InOpcode);
	return DAG.getNode(ExtOp, DL, VT, Ext);
	}
	if (IdxVal == 0 && InOpcode == ISD::VSELECT &&
	InVec.getOperand(0).getValueType().is256BitVector() &&
	InVec.getOperand(1).getValueType().is256BitVector() &&
	InVec.getOperand(2).getValueType().is256BitVector()) {
	SDLoc DL(N);
	SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128);
	SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128);
	SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
	return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
	}
	if (IdxVal == 0 && InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() &&
	(VT.is128BitVector() \|\| VT.is256BitVector())) {
	SDLoc DL(N);
	SDValue InVecSrc = InVec.getOperand(0);
	unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits;
	SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
	return DAG.getNode(InOpcode, DL, VT, Ext);
	}
	if (InOpcode == X86ISD::MOVDDUP &&
	(VT.is128BitVector() \|\| VT.is256BitVector())) {
	SDLoc DL(N);
	SDValue Ext0 =
	extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
	return DAG.getNode(InOpcode, DL, VT, Ext0);
	}
	}

	// Always split vXi64 logical shifts where we're extracting the upper 32-bits
	// as this is very likely to fold into a shuffle/truncation.
	if ((InOpcode == X86ISD::VSHLI \|\| InOpcode == X86ISD::VSRLI) &&
	InVecVT.getScalarSizeInBits() == 64 &&
	InVec.getConstantOperandAPInt(1) == 32) {
	SDLoc DL(N);
	SDValue Ext =
	extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
	return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1));
	}

	return SDValue();
	}

	static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(0);
	SDLoc DL(N);

	// If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
	// This occurs frequently in our masked scalar intrinsic code and our
	// floating point select lowering with AVX512.
	// TODO: SimplifyDemandedBits instead?
	if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
	if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
	if (C->getAPIntValue().isOne())
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
	Src.getOperand(0));

	// Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
	if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
	Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
	if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
	if (C->isZero())
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
	Src.getOperand(1));

	// Reduce v2i64 to v4i32 if we don't need the upper bits or are known zero.
	// TODO: Move to DAGCombine/SimplifyDemandedBits?
	if ((VT == MVT::v2i64 \|\| VT == MVT::v2f64) && Src.hasOneUse()) {
	auto IsExt64 = [&DAG](SDValue Op, bool IsZeroExt) {
	if (Op.getValueType() != MVT::i64)
	return SDValue();
	unsigned Opc = IsZeroExt ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND;
	if (Op.getOpcode() == Opc &&
	Op.getOperand(0).getScalarValueSizeInBits() <= 32)
	return Op.getOperand(0);
	unsigned Ext = IsZeroExt ? ISD::ZEXTLOAD : ISD::EXTLOAD;
	if (auto *Ld = dyn_cast<LoadSDNode>(Op))
	if (Ld->getExtensionType() == Ext &&
	Ld->getMemoryVT().getScalarSizeInBits() <= 32)
	return Op;
	if (IsZeroExt) {
	KnownBits Known = DAG.computeKnownBits(Op);
	if (!Known.isConstant() && Known.countMinLeadingZeros() >= 32)
	return Op;
	}
	return SDValue();
	};

	if (SDValue AnyExt = IsExt64(peekThroughOneUseBitcasts(Src), false))
	return DAG.getBitcast(
	VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
	DAG.getAnyExtOrTrunc(AnyExt, DL, MVT::i32)));

	if (SDValue ZeroExt = IsExt64(peekThroughOneUseBitcasts(Src), true))
	return DAG.getBitcast(
	VT,
	DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v4i32,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
	DAG.getZExtOrTrunc(ZeroExt, DL, MVT::i32))));
	}

	// Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ.
	if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST &&
	Src.getOperand(0).getValueType() == MVT::x86mmx)
	return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0));

	// See if we're broadcasting the scalar value, in which case just reuse that.
	// Ensure the same SDValue from the SDNode use is being used.
	if (VT.getScalarType() == Src.getValueType())
	for (SDNode *User : Src->uses())
	if (User->getOpcode() == X86ISD::VBROADCAST &&
	Src == User->getOperand(0)) {
	unsigned SizeInBits = VT.getFixedSizeInBits();
	unsigned BroadcastSizeInBits =
	User->getValueSizeInBits(0).getFixedValue();
	if (BroadcastSizeInBits == SizeInBits)
	return SDValue(User, 0);
	if (BroadcastSizeInBits > SizeInBits)
	return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
	// TODO: Handle BroadcastSizeInBits < SizeInBits when we have test
	// coverage.
	}

	return SDValue();
	}

	// Simplify PMULDQ and PMULUDQ operations.
	static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// Canonicalize constant to RHS.
	if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(RHS))
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);

	// Multiply by zero.
	// Don't return RHS as it may contain UNDEFs.
	if (ISD::isBuildVectorAllZeros(RHS.getNode()))
	return DAG.getConstant(0, SDLoc(N), N->getValueType(0));

	// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(64), DCI))
	return SDValue(N, 0);

	// If the input is an extend_invec and the SimplifyDemandedBits call didn't
	// convert it to any_extend_invec, due to the LegalOperations check, do the
	// conversion directly to a vector shuffle manually. This exposes combine
	// opportunities missed by combineEXTEND_VECTOR_INREG not calling
	// combineX86ShufflesRecursively on SSE4.1 targets.
	// FIXME: This is basically a hack around several other issues related to
	// ANY_EXTEND_VECTOR_INREG.
	if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() &&
	(LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG \|\|
	LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) &&
	LHS.getOperand(0).getValueType() == MVT::v4i32) {
	SDLoc dl(N);
	LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0),
	LHS.getOperand(0), { 0, -1, 1, -1 });
	LHS = DAG.getBitcast(MVT::v2i64, LHS);
	return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
	}
	if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() &&
	(RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG \|\|
	RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) &&
	RHS.getOperand(0).getValueType() == MVT::v4i32) {
	SDLoc dl(N);
	RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0),
	RHS.getOperand(0), { 0, -1, 1, -1 });
	RHS = DAG.getBitcast(MVT::v2i64, RHS);
	return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
	}

	return SDValue();
	}

	// Simplify VPMADDUBSW/VPMADDWD operations.
	static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// Multiply by zero.
	// Don't return LHS/RHS as it may contain UNDEFs.
	if (ISD::isBuildVectorAllZeros(LHS.getNode()) \|\|
	ISD::isBuildVectorAllZeros(RHS.getNode()))
	return DAG.getConstant(0, SDLoc(N), VT);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
	if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue In = N->getOperand(0);
	unsigned Opcode = N->getOpcode();
	unsigned InOpcode = In.getOpcode();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc DL(N);

	// Try to merge vector loads and extend_inreg to an extload.
	if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
	In.hasOneUse()) {
	auto *Ld = cast<LoadSDNode>(In);
	if (Ld->isSimple()) {
	MVT SVT = In.getSimpleValueType().getVectorElementType();
	ISD::LoadExtType Ext = Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
	? ISD::SEXTLOAD
	: ISD::ZEXTLOAD;
	EVT MemVT = VT.changeVectorElementType(SVT);
	if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
	SDValue Load = DAG.getExtLoad(
	Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
	MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags());
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
	return Load;
	}
	}
	}

	// Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
	if (Opcode == InOpcode)
	return DAG.getNode(Opcode, DL, VT, In.getOperand(0));

	// Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
	// -> EXTEND_VECTOR_INREG(X).
	// TODO: Handle non-zero subvector indices.
	if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 &&
	In.getOperand(0).getOpcode() == DAG.getOpcode_EXTEND(Opcode) &&
	In.getOperand(0).getOperand(0).getValueSizeInBits() ==
	In.getValueSizeInBits())
	return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0));

	// Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0).
	// TODO: Move to DAGCombine?
	if (!DCI.isBeforeLegalizeOps() && Opcode == ISD::ZERO_EXTEND_VECTOR_INREG &&
	In.getOpcode() == ISD::BUILD_VECTOR && In.hasOneUse() &&
	In.getValueSizeInBits() == VT.getSizeInBits()) {
	unsigned NumElts = VT.getVectorNumElements();
	unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits();
	EVT EltVT = In.getOperand(0).getValueType();
	SmallVector<SDValue> Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT));
	for (unsigned I = 0; I != NumElts; ++I)
	Elts[I * Scale] = In.getOperand(I);
	return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts));
	}

	// Attempt to combine as a shuffle on SSE41+ targets.
	if ((Opcode == ISD::ANY_EXTEND_VECTOR_INREG \|\|
	Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) &&
	Subtarget.hasSSE41()) {
	SDValue Op(N, 0);
	if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
	if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
	return Res;
	}

	return SDValue();
	}

	static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);

	if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
	return DAG.getConstant(0, SDLoc(N), VT);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
	if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	// Optimize (fp16_to_fp (fp_to_fp16 X)) to VCVTPS2PH followed by VCVTPH2PS.
	// Done as a combine because the lowering for fp16_to_fp and fp_to_fp16 produce
	// extra instructions between the conversion due to going to scalar and back.
	static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (Subtarget.useSoftFloat() \|\| !Subtarget.hasF16C())
	return SDValue();

	if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16)
	return SDValue();

	if (N->getValueType(0) != MVT::f32 \|\|
	N->getOperand(0).getOperand(0).getValueType() != MVT::f32)
	return SDValue();

	SDLoc dl(N);
	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32,
	N->getOperand(0).getOperand(0));
	Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res,
	DAG.getTargetConstant(4, dl, MVT::i32));
	Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
	DAG.getIntPtrConstant(0, dl));
	}

	static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasF16C() \|\| Subtarget.useSoftFloat())
	return SDValue();

	if (Subtarget.hasFP16())
	return SDValue();

	bool IsStrict = N->isStrictFPOpcode();
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);
	EVT SrcVT = Src.getValueType();

	if (!SrcVT.isVector() \|\| SrcVT.getVectorElementType() != MVT::f16)
	return SDValue();

	if (VT.getVectorElementType() != MVT::f32 &&
	VT.getVectorElementType() != MVT::f64)
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts == 1 \|\| !isPowerOf2_32(NumElts))
	return SDValue();

	SDLoc dl(N);

	// Convert the input to vXi16.
	EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
	Src = DAG.getBitcast(IntVT, Src);

	// Widen to at least 8 input elements.
	if (NumElts < 8) {
	unsigned NumConcats = 8 / NumElts;
	SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT)
	: DAG.getConstant(0, dl, IntVT);
	SmallVector<SDValue, 4> Ops(NumConcats, Fill);
	Ops[0] = Src;
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops);
	}

	// Destination is vXf32 with at least 4 elements.
	EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32,
	std::max(4U, NumElts));
	SDValue Cvt, Chain;
	if (IsStrict) {
	Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other},
	{N->getOperand(0), Src});
	Chain = Cvt.getValue(1);
	} else {
	Cvt = DAG.getNode(X86ISD::CVTPH2PS, dl, CvtVT, Src);
	}

	if (NumElts < 4) {
	assert(NumElts == 2 && "Unexpected size");
	Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Cvt,
	DAG.getIntPtrConstant(0, dl));
	}

	if (IsStrict) {
	// Extend to the original VT if necessary.
	if (Cvt.getValueType() != VT) {
	Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other},
	{Chain, Cvt});
	Chain = Cvt.getValue(1);
	}
	return DAG.getMergeValues({Cvt, Chain}, dl);
	}

	// Extend to the original VT if necessary.
	return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt);
	}

	// Try to find a larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD that we can extract
	// from. Limit this to cases where the loads have the same input chain and the
	// output chains are unused. This avoids any memory ordering issues.
	static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD \|\|
	N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) &&
	"Unknown broadcast load type");

	// Only do this if the chain result is unused.
	if (N->hasAnyUseOfValue(1))
	return SDValue();

	auto *MemIntrin = cast<MemIntrinsicSDNode>(N);

	SDValue Ptr = MemIntrin->getBasePtr();
	SDValue Chain = MemIntrin->getChain();
	EVT VT = N->getSimpleValueType(0);
	EVT MemVT = MemIntrin->getMemoryVT();

	// Look at other users of our base pointer and try to find a wider broadcast.
	// The input chain and the size of the memory VT must match.
	for (SDNode *User : Ptr->uses())
	if (User != N && User->getOpcode() == N->getOpcode() &&
	cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
	cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
	cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
	MemVT.getSizeInBits() &&
	!User->hasAnyUseOfValue(1) &&
	User->getValueSizeInBits(0).getFixedValue() > VT.getFixedSizeInBits()) {
	SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
	VT.getSizeInBits());
	Extract = DAG.getBitcast(VT, Extract);
	return DCI.CombineTo(N, Extract, SDValue(User, 1));
	}

	return SDValue();
	}

	static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasF16C() \|\| Subtarget.useSoftFloat())
	return SDValue();

	if (Subtarget.hasFP16())
	return SDValue();

	bool IsStrict = N->isStrictFPOpcode();
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(IsStrict ? 1 : 0);
	EVT SrcVT = Src.getValueType();

	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::f16 \|\|
	SrcVT.getVectorElementType() != MVT::f32)
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts == 1 \|\| !isPowerOf2_32(NumElts))
	return SDValue();

	SDLoc dl(N);

	// Widen to at least 4 input elements.
	if (NumElts < 4)
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
	DAG.getConstantFP(0.0, dl, SrcVT));

	// Destination is v8i16 with at least 8 elements.
	EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
	std::max(8U, NumElts));
	SDValue Cvt, Chain;
	SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32);
	if (IsStrict) {
	Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other},
	{N->getOperand(0), Src, Rnd});
	Chain = Cvt.getValue(1);
	} else {
	Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, Rnd);
	}

	// Extract down to real number of elements.
	if (NumElts < 8) {
	EVT IntVT = VT.changeVectorElementTypeToInteger();
	Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, IntVT, Cvt,
	DAG.getIntPtrConstant(0, dl));
	}

	Cvt = DAG.getBitcast(VT, Cvt);

	if (IsStrict)
	return DAG.getMergeValues({Cvt, Chain}, dl);

	return Cvt;
	}

	static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
	SDValue Src = N->getOperand(0);

	// Turn MOVDQ2Q+simple_load into an mmx load.
	if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
	LoadSDNode *LN = cast<LoadSDNode>(Src.getNode());

	if (LN->isSimple()) {
	SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(),
	LN->getBasePtr(),
	LN->getPointerInfo(),
	LN->getOriginalAlign(),
	LN->getMemOperand()->getFlags());
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), NewLd.getValue(1));
	return NewLd;
	}
	}

	return SDValue();
	}

	static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBits), DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	switch (N->getOpcode()) {
	default: break;
	case ISD::SCALAR_TO_VECTOR:
	return combineScalarToVector(N, DAG);
	case ISD::EXTRACT_VECTOR_ELT:
	case X86ISD::PEXTRW:
	case X86ISD::PEXTRB:
	return combineExtractVectorElt(N, DAG, DCI, Subtarget);
	case ISD::CONCAT_VECTORS:
	return combineCONCAT_VECTORS(N, DAG, DCI, Subtarget);
	case ISD::INSERT_SUBVECTOR:
	return combineINSERT_SUBVECTOR(N, DAG, DCI, Subtarget);
	case ISD::EXTRACT_SUBVECTOR:
	return combineEXTRACT_SUBVECTOR(N, DAG, DCI, Subtarget);
	case ISD::VSELECT:
	case ISD::SELECT:
	case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
	case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
	case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
	case X86ISD::CMP: return combineCMP(N, DAG);
	case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
	case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
	case X86ISD::ADD:
	case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
	case X86ISD::SBB: return combineSBB(N, DAG);
	case X86ISD::ADC: return combineADC(N, DAG, DCI);
	case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
	case ISD::SHL: return combineShiftLeft(N, DAG);
	case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget);
	case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget);
	case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
	case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
	case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
	case X86ISD::BEXTR:
	case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget);
	case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
	case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
	case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
	case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
	case X86ISD::VEXTRACT_STORE:
	return combineVEXTRACT_STORE(N, DAG, DCI, Subtarget);
	case ISD::SINT_TO_FP:
	case ISD::STRICT_SINT_TO_FP:
	return combineSIntToFP(N, DAG, DCI, Subtarget);
	case ISD::UINT_TO_FP:
	case ISD::STRICT_UINT_TO_FP:
	return combineUIntToFP(N, DAG, Subtarget);
	case ISD::FADD:
	case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
	case X86ISD::VFCMULC:
	case X86ISD::VFMULC: return combineFMulcFCMulc(N, DAG, Subtarget);
	case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget);
	case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
	case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI);
	case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
	case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
	case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
	case X86ISD::FXOR:
	case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget);
	case X86ISD::FMIN:
	case X86ISD::FMAX: return combineFMinFMax(N, DAG);
	case ISD::FMINNUM:
	case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
	case X86ISD::CVTSI2P:
	case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
	case X86ISD::CVTP2SI:
	case X86ISD::CVTP2UI:
	case X86ISD::STRICT_CVTTP2SI:
	case X86ISD::CVTTP2SI:
	case X86ISD::STRICT_CVTTP2UI:
	case X86ISD::CVTTP2UI:
	return combineCVTP2I_CVTTP2I(N, DAG, DCI);
	case X86ISD::STRICT_CVTPH2PS:
	case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI);
	case X86ISD::BT: return combineBT(N, DAG, DCI);
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
	case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
	case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget);
	case ISD::SETCC: return combineSetCC(N, DAG, DCI, Subtarget);
	case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
	case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
	case X86ISD::PACKSS:
	case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
	case X86ISD::HADD:
	case X86ISD::HSUB:
	case X86ISD::FHADD:
	case X86ISD::FHSUB: return combineVectorHADDSUB(N, DAG, DCI, Subtarget);
	case X86ISD::VSHL:
	case X86ISD::VSRA:
	case X86ISD::VSRL:
	return combineVectorShiftVar(N, DAG, DCI, Subtarget);
	case X86ISD::VSHLI:
	case X86ISD::VSRAI:
	case X86ISD::VSRLI:
	return combineVectorShiftImm(N, DAG, DCI, Subtarget);
	case ISD::INSERT_VECTOR_ELT:
	case X86ISD::PINSRB:
	case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
	case X86ISD::SHUFP: // Handle all target specific shuffles
	case X86ISD::INSERTPS:
	case X86ISD::EXTRQI:
	case X86ISD::INSERTQI:
	case X86ISD::VALIGN:
	case X86ISD::PALIGNR:
	case X86ISD::VSHLDQ:
	case X86ISD::VSRLDQ:
	case X86ISD::BLENDI:
	case X86ISD::UNPCKH:
	case X86ISD::UNPCKL:
	case X86ISD::MOVHLPS:
	case X86ISD::MOVLHPS:
	case X86ISD::PSHUFB:
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFHW:
	case X86ISD::PSHUFLW:
	case X86ISD::MOVSHDUP:
	case X86ISD::MOVSLDUP:
	case X86ISD::MOVDDUP:
	case X86ISD::MOVSS:
	case X86ISD::MOVSD:
	case X86ISD::MOVSH:
	case X86ISD::VBROADCAST:
	case X86ISD::VPPERM:
	case X86ISD::VPERMI:
	case X86ISD::VPERMV:
	case X86ISD::VPERMV3:
	case X86ISD::VPERMIL2:
	case X86ISD::VPERMILPI:
	case X86ISD::VPERMILPV:
	case X86ISD::VPERM2X128:
	case X86ISD::SHUF128:
	case X86ISD::VZEXT_MOVL:
	case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
	case X86ISD::FMADD_RND:
	case X86ISD::FMSUB:
	case X86ISD::STRICT_FMSUB:
	case X86ISD::FMSUB_RND:
	case X86ISD::FNMADD:
	case X86ISD::STRICT_FNMADD:
	case X86ISD::FNMADD_RND:
	case X86ISD::FNMSUB:
	case X86ISD::STRICT_FNMSUB:
	case X86ISD::FNMSUB_RND:
	case ISD::FMA:
	case ISD::STRICT_FMA: return combineFMA(N, DAG, DCI, Subtarget);
	case X86ISD::FMADDSUB_RND:
	case X86ISD::FMSUBADD_RND:
	case X86ISD::FMADDSUB:
	case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI);
	case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
	case X86ISD::MGATHER:
	case X86ISD::MSCATTER:
	return combineX86GatherScatter(N, DAG, DCI, Subtarget);
	case ISD::MGATHER:
	case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI);
	case X86ISD::PCMPEQ:
	case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
	case X86ISD::PMULDQ:
	case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
	case X86ISD::VPMADDUBSW:
	case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
	case X86ISD::KSHIFTL:
	case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
	case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
	case ISD::STRICT_FP_EXTEND:
	case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget);
	case ISD::STRICT_FP_ROUND:
	case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget);
	case X86ISD::VBROADCAST_LOAD:
	case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI);
	case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG);
	case X86ISD::PDEP: return combinePDEP(N, DAG, DCI);
	}

	return SDValue();
	}

	bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
	if (!isTypeLegal(VT))
	return false;

	// There are no vXi8 shifts.
	if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
	return false;

	// TODO: Almost no 8-bit ops are desirable because they have no actual
	// size/speed advantages vs. 32-bit ops, but they do have a major
	// potential disadvantage by causing partial register stalls.
	//
	// 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and
	// we have specializations to turn 32-bit multiply/shl into LEA or other ops.
	// Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
	// check for a constant operand to the multiply.
	if ((Opc == ISD::MUL \|\| Opc == ISD::SHL) && VT == MVT::i8)
	return false;

	// i16 instruction encodings are longer and some i16 instructions are slow,
	// so those are not desirable.
	if (VT == MVT::i16) {
	switch (Opc) {
	default:
	break;
	case ISD::LOAD:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::SUB:
	case ISD::ADD:
	case ISD::MUL:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	return false;
	}
	}

	// Any legal type not explicitly accounted for above here is desirable.
	return true;
	}

	SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
	SDValue Value, SDValue Addr,
	SelectionDAG &DAG) const {
	const Module *M = DAG.getMachineFunction().getMMI().getModule();
	Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
	if (IsCFProtectionSupported) {
	// In case control-flow branch protection is enabled, we need to add
	// notrack prefix to the indirect branch.
	// In order to do that we create NT_BRIND SDNode.
	// Upon ISEL, the pattern will convert it to jmp with NoTrack prefix.
	return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, Value, Addr);
	}

	return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
	}

	bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
	EVT VT = Op.getValueType();
	bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
	isa<ConstantSDNode>(Op.getOperand(1));

	// i16 is legal, but undesirable since i16 instruction encodings are longer
	// and some i16 instructions are slow.
	// 8-bit multiply-by-constant can usually be expanded to something cheaper
	// using LEA and/or other ALU ops.
	if (VT != MVT::i16 && !Is8BitMulByConstant)
	return false;

	auto IsFoldableRMW = [](SDValue Load, SDValue Op) {
	if (!Op.hasOneUse())
	return false;
	SDNode User = Op->use_begin();
	if (!ISD::isNormalStore(User))
	return false;
	auto *Ld = cast<LoadSDNode>(Load);
	auto *St = cast<StoreSDNode>(User);
	return Ld->getBasePtr() == St->getBasePtr();
	};

	auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
	if (!Load.hasOneUse() \|\| Load.getOpcode() != ISD::ATOMIC_LOAD)
	return false;
	if (!Op.hasOneUse())
	return false;
	SDNode User = Op->use_begin();
	if (User->getOpcode() != ISD::ATOMIC_STORE)
	return false;
	auto *Ld = cast<AtomicSDNode>(Load);
	auto *St = cast<AtomicSDNode>(User);
	return Ld->getBasePtr() == St->getBasePtr();
	};

	bool Commute = false;
	switch (Op.getOpcode()) {
	default: return false;
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	break;
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL: {
	SDValue N0 = Op.getOperand(0);
	// Look out for (store (shl (load), x)).
	if (X86::mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
	return false;
	break;
	}
	case ISD::ADD:
	case ISD::MUL:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	Commute = true;
	[[fallthrough]];
	case ISD::SUB: {
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	// Avoid disabling potential load folding opportunities.
	if (X86::mayFoldLoad(N1, Subtarget) &&
	(!Commute \|\| !isa<ConstantSDNode>(N0) \|\|
	(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op))))
	return false;
	if (X86::mayFoldLoad(N0, Subtarget) &&
	((Commute && !isa<ConstantSDNode>(N1)) \|\|
	(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
	return false;
	if (IsFoldableAtomicRMW(N0, Op) \|\|
	(Commute && IsFoldableAtomicRMW(N1, Op)))
	return false;
	}
	}

	PVT = MVT::i32;
	return true;
	}

	//===----------------------------------------------------------------------===//
	// X86 Inline Assembly Support
	//===----------------------------------------------------------------------===//

	// Helper to match a string separated by whitespace.
	static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
	S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.

	for (StringRef Piece : Pieces) {
	if (!S.startswith(Piece)) // Check if the piece matches.
	return false;

	S = S.substr(Piece.size());
	StringRef::size_type Pos = S.find_first_not_of(" \t");
	if (Pos == 0) // We matched a prefix.
	return false;

	S = S.substr(Pos);
	}

	return S.empty();
	}

	static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {

	if (AsmPieces.size() == 3 \|\| AsmPieces.size() == 4) {
	if (llvm::is_contained(AsmPieces, "~{cc}") &&
	llvm::is_contained(AsmPieces, "~{flags}") &&
	llvm::is_contained(AsmPieces, "~{fpsr}")) {

	if (AsmPieces.size() == 3)
	return true;
	else if (llvm::is_contained(AsmPieces, "~{dirflag}"))
	return true;
	}
	}
	return false;
	}

	bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
	InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());

	const std::string &AsmStr = IA->getAsmString();

	IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
	if (!Ty \|\| Ty->getBitWidth() % 16 != 0)
	return false;

	// TODO: should remove alternatives from the asmstring: "foo {a\|b}" -> "foo a"
	SmallVector<StringRef, 4> AsmPieces;
	SplitString(AsmStr, AsmPieces, ";\n");

	switch (AsmPieces.size()) {
	default: return false;
	case 1:
	// FIXME: this should verify that we are targeting a 486 or better. If not,
	// we will turn this bswap into something that will be lowered to logical
	// ops instead of emitting the bswap asm. For now, we don't support 486 or
	// lower so don't worry about this.
	// bswap $0
	if (matchAsm(AsmPieces[0], {"bswap", "$0"}) \|\|
	matchAsm(AsmPieces[0], {"bswapl", "$0"}) \|\|
	matchAsm(AsmPieces[0], {"bswapq", "$0"}) \|\|
	matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) \|\|
	matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) \|\|
	matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
	// No need to check constraints, nothing other than the equivalent of
	// "=r,0" would be valid here.
	return IntrinsicLowering::LowerToByteSwap(CI);
	}

	// rorw $$8, ${0:w} --> llvm.bswap.i16
	if (CI->getType()->isIntegerTy(16) &&
	IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
	(matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) \|\|
	matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
	AsmPieces.clear();
	StringRef ConstraintsStr = IA->getConstraintString();
	SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
	array_pod_sort(AsmPieces.begin(), AsmPieces.end());
	if (clobbersFlagRegisters(AsmPieces))
	return IntrinsicLowering::LowerToByteSwap(CI);
	}
	break;
	case 3:
	if (CI->getType()->isIntegerTy(32) &&
	IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
	matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
	matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
	matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
	AsmPieces.clear();
	StringRef ConstraintsStr = IA->getConstraintString();
	SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
	array_pod_sort(AsmPieces.begin(), AsmPieces.end());
	if (clobbersFlagRegisters(AsmPieces))
	return IntrinsicLowering::LowerToByteSwap(CI);
	}

	if (CI->getType()->isIntegerTy(64)) {
	InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
	if (Constraints.size() >= 2 &&
	Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
	Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
	// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
	if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
	matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
	matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
	return IntrinsicLowering::LowerToByteSwap(CI);
	}
	}
	break;
	}
	return false;
	}

	static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) {
	X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint)
	.Case("{@cca}", X86::COND_A)
	.Case("{@ccae}", X86::COND_AE)
	.Case("{@ccb}", X86::COND_B)
	.Case("{@ccbe}", X86::COND_BE)
	.Case("{@ccc}", X86::COND_B)
	.Case("{@cce}", X86::COND_E)
	.Case("{@ccz}", X86::COND_E)
	.Case("{@ccg}", X86::COND_G)
	.Case("{@ccge}", X86::COND_GE)
	.Case("{@ccl}", X86::COND_L)
	.Case("{@ccle}", X86::COND_LE)
	.Case("{@ccna}", X86::COND_BE)
	.Case("{@ccnae}", X86::COND_B)
	.Case("{@ccnb}", X86::COND_AE)
	.Case("{@ccnbe}", X86::COND_A)
	.Case("{@ccnc}", X86::COND_AE)
	.Case("{@ccne}", X86::COND_NE)
	.Case("{@ccnz}", X86::COND_NE)
	.Case("{@ccng}", X86::COND_LE)
	.Case("{@ccnge}", X86::COND_L)
	.Case("{@ccnl}", X86::COND_GE)
	.Case("{@ccnle}", X86::COND_G)
	.Case("{@ccno}", X86::COND_NO)
	.Case("{@ccnp}", X86::COND_NP)
	.Case("{@ccns}", X86::COND_NS)
	.Case("{@cco}", X86::COND_O)
	.Case("{@ccp}", X86::COND_P)
	.Case("{@ccs}", X86::COND_S)
	.Default(X86::COND_INVALID);
	return Cond;
	}

	/// Given a constraint letter, return the type of constraint for this target.
	X86TargetLowering::ConstraintType
	X86TargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'R':
	case 'q':
	case 'Q':
	case 'f':
	case 't':
	case 'u':
	case 'y':
	case 'x':
	case 'v':
	case 'l':
	case 'k': // AVX512 masking registers.
	return C_RegisterClass;
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'S':
	case 'D':
	case 'A':
	return C_Register;
	case 'I':
	case 'J':
	case 'K':
	case 'N':
	case 'G':
	case 'L':
	case 'M':
	return C_Immediate;
	case 'C':
	case 'e':
	case 'Z':
	return C_Other;
	default:
	break;
	}
	}
	else if (Constraint.size() == 2) {
	switch (Constraint[0]) {
	default:
	break;
	case 'Y':
	switch (Constraint[1]) {
	default:
	break;
	case 'z':
	return C_Register;
	case 'i':
	case 'm':
	case 'k':
	case 't':
	case '2':
	return C_RegisterClass;
	}
	}
	} else if (parseConstraintCode(Constraint) != X86::COND_INVALID)
	return C_Other;
	return TargetLowering::getConstraintType(Constraint);
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	X86TargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	Type *type = CallOperandVal->getType();
	// Look at the constraint type.
	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	[[fallthrough]];
	case 'R':
	case 'q':
	case 'Q':
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'S':
	case 'D':
	case 'A':
	if (CallOperandVal->getType()->isIntegerTy())
	weight = CW_SpecificReg;
	break;
	case 'f':
	case 't':
	case 'u':
	if (type->isFloatingPointTy())
	weight = CW_SpecificReg;
	break;
	case 'y':
	if (type->isX86_MMXTy() && Subtarget.hasMMX())
	weight = CW_SpecificReg;
	break;
	case 'Y':
	if (StringRef(constraint).size() != 2)
	break;
	switch (constraint[1]) {
	default:
	return CW_Invalid;
	// XMM0
	case 'z':
	if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) \|\|
	((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) \|\|
	((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))
	return CW_SpecificReg;
	return CW_Invalid;
	// Conditional OpMask regs (AVX512)
	case 'k':
	if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
	return CW_Register;
	return CW_Invalid;
	// Any MMX reg
	case 'm':
	if (type->isX86_MMXTy() && Subtarget.hasMMX())
	return weight;
	return CW_Invalid;
	// Any SSE reg when ISA >= SSE2, same as 'x'
	case 'i':
	case 't':
	case '2':
	if (!Subtarget.hasSSE2())
	return CW_Invalid;
	break;
	}
	break;
	case 'v':
	if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
	weight = CW_Register;
	[[fallthrough]];
	case 'x':
	if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) \|\|
	((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()))
	weight = CW_Register;
	break;
	case 'k':
	// Enable conditional vector operations using %k<#> registers.
	if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
	weight = CW_Register;
	break;
	case 'I':
	if (auto *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
	if (C->getZExtValue() <= 31)
	weight = CW_Constant;
	}
	break;
	case 'J':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 63)
	weight = CW_Constant;
	}
	break;
	case 'K':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
	weight = CW_Constant;
	}
	break;
	case 'L':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if ((C->getZExtValue() == 0xff) \|\| (C->getZExtValue() == 0xffff))
	weight = CW_Constant;
	}
	break;
	case 'M':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 3)
	weight = CW_Constant;
	}
	break;
	case 'N':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 0xff)
	weight = CW_Constant;
	}
	break;
	case 'G':
	case 'C':
	if (isa<ConstantFP>(CallOperandVal)) {
	weight = CW_Constant;
	}
	break;
	case 'e':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if ((C->getSExtValue() >= -0x80000000LL) &&
	(C->getSExtValue() <= 0x7fffffffLL))
	weight = CW_Constant;
	}
	break;
	case 'Z':
	if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 0xffffffff)
	weight = CW_Constant;
	}
	break;
	}
	return weight;
	}

	/// Try to replace an X constraint, which matches anything, with another that
	/// has more specific requirements based on the type of the corresponding
	/// operand.
	const char *X86TargetLowering::
	LowerXConstraint(EVT ConstraintVT) const {
	// FP X constraints get lowered to SSE1/2 registers if available, otherwise
	// 'f' like normal targets.
	if (ConstraintVT.isFloatingPoint()) {
	if (Subtarget.hasSSE1())
	return "x";
	}

	return TargetLowering::LowerXConstraint(ConstraintVT);
	}

	// Lower @cc targets via setcc.
	SDValue X86TargetLowering::LowerAsmOutputForConstraint(
	SDValue &Chain, SDValue &Flag, const SDLoc &DL,
	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
	X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
	if (Cond == X86::COND_INVALID)
	return SDValue();
	// Check that return type is valid.
	if (OpInfo.ConstraintVT.isVector() \|\| !OpInfo.ConstraintVT.isInteger() \|\|
	OpInfo.ConstraintVT.getSizeInBits() < 8)
	report_fatal_error("Flag output operand is of invalid type");

	// Get EFLAGS register. Only update chain when copyfrom is glued.
	if (Flag.getNode()) {
	Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag);
	Chain = Flag.getValue(1);
	} else
	Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32);
	// Extract CC code.
	SDValue CC = getSETCC(Cond, Flag, DL, DAG);
	// Extend to 32-bits
	SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);

	return Result;
	}

	/// Lower the specified operand into the Ops vector.
	/// If it is invalid, don't add anything to Ops.
	void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue>&Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Only support length 1 constraints for now.
	if (Constraint.length() > 1) return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default: break;
	case 'I':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 31) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'J':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 63) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'K':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (isInt<8>(C->getSExtValue())) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'L':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() == 0xff \|\| C->getZExtValue() == 0xffff \|\|
	(Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
	Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'M':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 3) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'N':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 255) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'O':
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 127) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'e': {
	// 32-bit signed value
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
	C->getSExtValue())) {
	// Widen to 64 bits here to get it sign extended.
	Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
	break;
	}
	// FIXME gcc accepts some relocatable values here too, but only in certain
	// memory models; it's complicated.
	}
	return;
	}
	case 'Z': {
	// 32-bit unsigned value
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
	C->getZExtValue())) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	// FIXME gcc accepts some relocatable values here too, but only in certain
	// memory models; it's complicated.
	return;
	}
	case 'i': {
	// Literal immediates are always ok.
	if (auto *CST = dyn_cast<ConstantSDNode>(Op)) {
	bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
	BooleanContent BCont = getBooleanContents(MVT::i64);
	ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
	: ISD::SIGN_EXTEND;
	int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
	: CST->getSExtValue();
	Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
	break;
	}

	// In any sort of PIC mode addresses need to be computed at runtime by
	// adding in a register or some sort of table lookup. These can't
	// be used as immediates. BlockAddresses and BasicBlocks are fine though.
	if ((Subtarget.isPICStyleGOT() \|\| Subtarget.isPICStyleStubPIC()) &&
	!(isa<BlockAddressSDNode>(Op) \|\| isa<BasicBlockSDNode>(Op)))
	return;

	// If we are in non-pic codegen mode, we allow the address of a global (with
	// an optional displacement) to be used with 'i'.
	if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op))
	// If we require an extra load to get this address, as in PIC mode, we
	// can't accept it.
	if (isGlobalStubReference(
	Subtarget.classifyGlobalReference(GA->getGlobal())))
	return;
	break;
	}
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}
	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	/// Check if \p RC is a general purpose register class.
	/// I.e., GR* or one of their variant.
	static bool isGRClass(const TargetRegisterClass &RC) {
	return RC.hasSuperClassEq(&X86::GR8RegClass) \|\|
	RC.hasSuperClassEq(&X86::GR16RegClass) \|\|
	RC.hasSuperClassEq(&X86::GR32RegClass) \|\|
	RC.hasSuperClassEq(&X86::GR64RegClass) \|\|
	RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass);
	}

	/// Check if \p RC is a vector register class.
	/// I.e., FR* / VR* or one of their variant.
	static bool isFRClass(const TargetRegisterClass &RC) {
	return RC.hasSuperClassEq(&X86::FR16XRegClass) \|\|
	RC.hasSuperClassEq(&X86::FR32XRegClass) \|\|
	RC.hasSuperClassEq(&X86::FR64XRegClass) \|\|
	RC.hasSuperClassEq(&X86::VR128XRegClass) \|\|
	RC.hasSuperClassEq(&X86::VR256XRegClass) \|\|
	RC.hasSuperClassEq(&X86::VR512RegClass);
	}

	/// Check if \p RC is a mask register class.
	/// I.e., VK* or one of their variant.
	static bool isVKClass(const TargetRegisterClass &RC) {
	return RC.hasSuperClassEq(&X86::VK1RegClass) \|\|
	RC.hasSuperClassEq(&X86::VK2RegClass) \|\|
	RC.hasSuperClassEq(&X86::VK4RegClass) \|\|
	RC.hasSuperClassEq(&X86::VK8RegClass) \|\|
	RC.hasSuperClassEq(&X86::VK16RegClass) \|\|
	RC.hasSuperClassEq(&X86::VK32RegClass) \|\|
	RC.hasSuperClassEq(&X86::VK64RegClass);
	}

	std::pair<unsigned, const TargetRegisterClass *>
	X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
	StringRef Constraint,
	MVT VT) const {
	// First, see if this is a constraint that directly corresponds to an LLVM
	// register class.
	if (Constraint.size() == 1) {
	// GCC Constraint Letters
	switch (Constraint[0]) {
	default: break;
	// 'A' means [ER]AX + [ER]DX.
	case 'A':
	if (Subtarget.is64Bit())
	return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
	assert((Subtarget.is32Bit() \|\| Subtarget.is16Bit()) &&
	"Expecting 64, 32 or 16 bit subtarget");
	return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);

	// TODO: Slight differences here in allocation order and leaving
	// RIP in the class. Do they matter any more here than they do
	// in the normal allocation?
	case 'k':
	if (Subtarget.hasAVX512()) {
	if (VT == MVT::i1)
	return std::make_pair(0U, &X86::VK1RegClass);
	if (VT == MVT::i8)
	return std::make_pair(0U, &X86::VK8RegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::VK16RegClass);
	}
	if (Subtarget.hasBWI()) {
	if (VT == MVT::i32)
	return std::make_pair(0U, &X86::VK32RegClass);
	if (VT == MVT::i64)
	return std::make_pair(0U, &X86::VK64RegClass);
	}
	break;
	case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
	if (Subtarget.is64Bit()) {
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8RegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16RegClass);
	if (VT == MVT::i32 \|\| VT == MVT::f32)
	return std::make_pair(0U, &X86::GR32RegClass);
	if (VT != MVT::f80 && !VT.isVector())
	return std::make_pair(0U, &X86::GR64RegClass);
	break;
	}
	[[fallthrough]];
	// 32-bit fallthrough
	case 'Q': // Q_REGS
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16_ABCDRegClass);
	if (VT == MVT::i32 \|\| VT == MVT::f32 \|\|
	(!VT.isVector() && !Subtarget.is64Bit()))
	return std::make_pair(0U, &X86::GR32_ABCDRegClass);
	if (VT != MVT::f80 && !VT.isVector())
	return std::make_pair(0U, &X86::GR64_ABCDRegClass);
	break;
	case 'r': // GENERAL_REGS
	case 'l': // INDEX_REGS
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8RegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16RegClass);
	if (VT == MVT::i32 \|\| VT == MVT::f32 \|\|
	(!VT.isVector() && !Subtarget.is64Bit()))
	return std::make_pair(0U, &X86::GR32RegClass);
	if (VT != MVT::f80 && !VT.isVector())
	return std::make_pair(0U, &X86::GR64RegClass);
	break;
	case 'R': // LEGACY_REGS
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8_NOREXRegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16_NOREXRegClass);
	if (VT == MVT::i32 \|\| VT == MVT::f32 \|\|
	(!VT.isVector() && !Subtarget.is64Bit()))
	return std::make_pair(0U, &X86::GR32_NOREXRegClass);
	if (VT != MVT::f80 && !VT.isVector())
	return std::make_pair(0U, &X86::GR64_NOREXRegClass);
	break;
	case 'f': // FP Stack registers.
	// If SSE is enabled for this VT, use f80 to ensure the isel moves the
	// value to the correct fpstack register class.
	if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
	return std::make_pair(0U, &X86::RFP32RegClass);
	if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
	return std::make_pair(0U, &X86::RFP64RegClass);
	if (VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f80)
	return std::make_pair(0U, &X86::RFP80RegClass);
	break;
	case 'y': // MMX_REGS if MMX allowed.
	if (!Subtarget.hasMMX()) break;
	return std::make_pair(0U, &X86::VR64RegClass);
	case 'v':
	case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
	if (!Subtarget.hasSSE1()) break;
	bool VConstraint = (Constraint[0] == 'v');

	switch (VT.SimpleTy) {
	default: break;
	// Scalar SSE types.
	case MVT::f16:
	if (VConstraint && Subtarget.hasFP16())
	return std::make_pair(0U, &X86::FR16XRegClass);
	break;
	case MVT::f32:
	case MVT::i32:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::FR32XRegClass);
	return std::make_pair(0U, &X86::FR32RegClass);
	case MVT::f64:
	case MVT::i64:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::FR64XRegClass);
	return std::make_pair(0U, &X86::FR64RegClass);
	case MVT::i128:
	if (Subtarget.is64Bit()) {
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::VR128XRegClass);
	return std::make_pair(0U, &X86::VR128RegClass);
	}
	break;
	// Vector types and fp128.
	case MVT::v8f16:
	if (!Subtarget.hasFP16())
	break;
	[[fallthrough]];
	case MVT::f128:
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	case MVT::v2i64:
	case MVT::v4f32:
	case MVT::v2f64:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::VR128XRegClass);
	return std::make_pair(0U, &X86::VR128RegClass);
	// AVX types.
	case MVT::v16f16:
	if (!Subtarget.hasFP16())
	break;
	[[fallthrough]];
	case MVT::v32i8:
	case MVT::v16i16:
	case MVT::v8i32:
	case MVT::v4i64:
	case MVT::v8f32:
	case MVT::v4f64:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::VR256XRegClass);
	if (Subtarget.hasAVX())
	return std::make_pair(0U, &X86::VR256RegClass);
	break;
	case MVT::v32f16:
	if (!Subtarget.hasFP16())
	break;
	[[fallthrough]];
	case MVT::v64i8:
	case MVT::v32i16:
	case MVT::v8f64:
	case MVT::v16f32:
	case MVT::v16i32:
	case MVT::v8i64:
	if (!Subtarget.hasAVX512()) break;
	if (VConstraint)
	return std::make_pair(0U, &X86::VR512RegClass);
	return std::make_pair(0U, &X86::VR512_0_15RegClass);
	}
	break;
	}
	} else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
	switch (Constraint[1]) {
	default:
	break;
	case 'i':
	case 't':
	case '2':
	return getRegForInlineAsmConstraint(TRI, "x", VT);
	case 'm':
	if (!Subtarget.hasMMX()) break;
	return std::make_pair(0U, &X86::VR64RegClass);
	case 'z':
	if (!Subtarget.hasSSE1()) break;
	switch (VT.SimpleTy) {
	default: break;
	// Scalar SSE types.
	case MVT::f16:
	if (!Subtarget.hasFP16())
	break;
	return std::make_pair(X86::XMM0, &X86::FR16XRegClass);
	case MVT::f32:
	case MVT::i32:
	return std::make_pair(X86::XMM0, &X86::FR32RegClass);
	case MVT::f64:
	case MVT::i64:
	return std::make_pair(X86::XMM0, &X86::FR64RegClass);
	case MVT::v8f16:
	if (!Subtarget.hasFP16())
	break;
	[[fallthrough]];
	case MVT::f128:
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	case MVT::v2i64:
	case MVT::v4f32:
	case MVT::v2f64:
	return std::make_pair(X86::XMM0, &X86::VR128RegClass);
	// AVX types.
	case MVT::v16f16:
	if (!Subtarget.hasFP16())
	break;
	[[fallthrough]];
	case MVT::v32i8:
	case MVT::v16i16:
	case MVT::v8i32:
	case MVT::v4i64:
	case MVT::v8f32:
	case MVT::v4f64:
	if (Subtarget.hasAVX())
	return std::make_pair(X86::YMM0, &X86::VR256RegClass);
	break;
	case MVT::v32f16:
	if (!Subtarget.hasFP16())
	break;
	[[fallthrough]];
	case MVT::v64i8:
	case MVT::v32i16:
	case MVT::v8f64:
	case MVT::v16f32:
	case MVT::v16i32:
	case MVT::v8i64:
	if (Subtarget.hasAVX512())
	return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
	break;
	}
	break;
	case 'k':
	// This register class doesn't allocate k0 for masked vector operation.
	if (Subtarget.hasAVX512()) {
	if (VT == MVT::i1)
	return std::make_pair(0U, &X86::VK1WMRegClass);
	if (VT == MVT::i8)
	return std::make_pair(0U, &X86::VK8WMRegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::VK16WMRegClass);
	}
	if (Subtarget.hasBWI()) {
	if (VT == MVT::i32)
	return std::make_pair(0U, &X86::VK32WMRegClass);
	if (VT == MVT::i64)
	return std::make_pair(0U, &X86::VK64WMRegClass);
	}
	break;
	}
	}

	if (parseConstraintCode(Constraint) != X86::COND_INVALID)
	return std::make_pair(0U, &X86::GR32RegClass);

	// Use the default implementation in TargetLowering to convert the register
	// constraint into a member of a register class.
	std::pair<Register, const TargetRegisterClass*> Res;
	Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

	// Not found as a standard register?
	if (!Res.second) {
	// Only match x87 registers if the VT is one SelectionDAGBuilder can convert
	// to/from f80.
	if (VT == MVT::Other \|\| VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f80) {
	// Map st(0) -> st(7) -> ST0
	if (Constraint.size() == 7 && Constraint[0] == '{' &&
	tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
	Constraint[3] == '(' &&
	(Constraint[4] >= '0' && Constraint[4] <= '7') &&
	Constraint[5] == ')' && Constraint[6] == '}') {
	// st(7) is not allocatable and thus not a member of RFP80. Return
	// singleton class in cases where we have a reference to it.
	if (Constraint[4] == '7')
	return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
	return std::make_pair(X86::FP0 + Constraint[4] - '0',
	&X86::RFP80RegClass);
	}

	// GCC allows "st(0)" to be called just plain "st".
	if (StringRef("{st}").equals_insensitive(Constraint))
	return std::make_pair(X86::FP0, &X86::RFP80RegClass);
	}

	// flags -> EFLAGS
	if (StringRef("{flags}").equals_insensitive(Constraint))
	return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);

	// dirflag -> DF
	// Only allow for clobber.
	if (StringRef("{dirflag}").equals_insensitive(Constraint) &&
	VT == MVT::Other)
	return std::make_pair(X86::DF, &X86::DFCCRRegClass);

	// fpsr -> FPSW
	if (StringRef("{fpsr}").equals_insensitive(Constraint))
	return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);

	return Res;
	}

	// Make sure it isn't a register that requires 64-bit mode.
	if (!Subtarget.is64Bit() &&
	(isFRClass(Res.second) \|\| isGRClass(Res.second)) &&
	TRI->getEncodingValue(Res.first) >= 8) {
	// Register requires REX prefix, but we're in 32-bit mode.
	return std::make_pair(0, nullptr);
	}

	// Make sure it isn't a register that requires AVX512.
	if (!Subtarget.hasAVX512() && isFRClass(*Res.second) &&
	TRI->getEncodingValue(Res.first) & 0x10) {
	// Register requires EVEX prefix.
	return std::make_pair(0, nullptr);
	}

	// Otherwise, check to see if this is a register class of the wrong value
	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
	// turn into {ax},{dx}.
	// MVT::Other is used to specify clobber names.
	if (TRI->isTypeLegalForClass(*Res.second, VT) \|\| VT == MVT::Other)
	return Res; // Correct type already, nothing to do.

	// Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
	// return "eax". This should even work for things like getting 64bit integer
	// registers when given an f64 type.
	const TargetRegisterClass *Class = Res.second;
	// The generic code will match the first register class that contains the
	// given register. Thus, based on the ordering of the tablegened file,
	// the "plain" GR classes might not come first.
	// Therefore, use a helper method.
	if (isGRClass(*Class)) {
	unsigned Size = VT.getSizeInBits();
	if (Size == 1) Size = 8;
	Register DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
	if (DestReg > 0) {
	bool is64Bit = Subtarget.is64Bit();
	const TargetRegisterClass *RC =
	Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
	: Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
	: Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
	: Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr)
	: nullptr;
	if (Size == 64 && !is64Bit) {
	// Model GCC's behavior here and select a fixed pair of 32-bit
	// registers.
	switch (DestReg) {
	case X86::RAX:
	return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
	case X86::RDX:
	return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
	case X86::RCX:
	return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
	case X86::RBX:
	return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
	case X86::RSI:
	return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
	case X86::RDI:
	return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
	case X86::RBP:
	return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
	default:
	return std::make_pair(0, nullptr);
	}
	}
	if (RC && RC->contains(DestReg))
	return std::make_pair(DestReg, RC);
	return Res;
	}
	// No register found/type mismatch.
	return std::make_pair(0, nullptr);
	} else if (isFRClass(*Class)) {
	// Handle references to XMM physical registers that got mapped into the
	// wrong class. This can happen with constraints like {xmm0} where the
	// target independent register mapper will just pick the first match it can
	// find, ignoring the required type.

	// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
	if (VT == MVT::f16)
	Res.second = &X86::FR16XRegClass;
	else if (VT == MVT::f32 \|\| VT == MVT::i32)
	Res.second = &X86::FR32XRegClass;
	else if (VT == MVT::f64 \|\| VT == MVT::i64)
	Res.second = &X86::FR64XRegClass;
	else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT))
	Res.second = &X86::VR128XRegClass;
	else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT))
	Res.second = &X86::VR256XRegClass;
	else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
	Res.second = &X86::VR512RegClass;
	else {
	// Type mismatch and not a clobber: Return an error;
	Res.first = 0;
	Res.second = nullptr;
	}
	} else if (isVKClass(*Class)) {
	if (VT == MVT::i1)
	Res.second = &X86::VK1RegClass;
	else if (VT == MVT::i8)
	Res.second = &X86::VK8RegClass;
	else if (VT == MVT::i16)
	Res.second = &X86::VK16RegClass;
	else if (VT == MVT::i32)
	Res.second = &X86::VK32RegClass;
	else if (VT == MVT::i64)
	Res.second = &X86::VK64RegClass;
	else {
	// Type mismatch and not a clobber: Return an error;
	Res.first = 0;
	Res.second = nullptr;
	}
	}

	return Res;
	}

	bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
	// Integer division on x86 is expensive. However, when aggressively optimizing
	// for code size, we prefer to use a div instruction, as it is usually smaller
	// than the alternative sequence.
	// The exception to this is vector division. Since x86 doesn't have vector
	// integer division, leaving the division as-is is a loss even in terms of
	// size, because it will have to be scalarized, while the alternative code
	// sequence can be performed in vector form.
	bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
	return OptSize && !VT.isVector();
	}

	void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
	if (!Subtarget.is64Bit())
	return;

	// Update IsSplitCSR in X86MachineFunctionInfo.
	X86MachineFunctionInfo *AFI =
	Entry->getParent()->getInfo<X86MachineFunctionInfo>();
	AFI->setIsSplitCSR(true);
	}

	void X86TargetLowering::insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
	if (!IStart)
	return;

	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
	MachineBasicBlock::iterator MBBI = Entry->begin();
	for (const MCPhysReg I = IStart; I; ++I) {
	const TargetRegisterClass *RC = nullptr;
	if (X86::GR64RegClass.contains(*I))
	RC = &X86::GR64RegClass;
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");

	Register NewVR = MRI->createVirtualRegister(RC);
	// Create copy from CSR to a virtual register.
	// FIXME: this currently does not emit CFI pseudo-instructions, it works
	// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
	// nounwind. If we want to generalize this later, we may need to emit
	// CFI pseudo-instructions.
	assert(
	Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
	"Function should be nounwind in insertCopiesSplitCSR!");
	Entry->addLiveIn(*I);
	BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
	.addReg(*I);

	// Insert the copy-back instructions right before the terminator.
	for (auto *Exit : Exits)
	BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::COPY), *I)
	.addReg(NewVR);
	}
	}

	bool X86TargetLowering::supportSwiftError() const {
	return Subtarget.is64Bit();
	}

	/// Returns true if stack probing through a function call is requested.
	bool X86TargetLowering::hasStackProbeSymbol(const MachineFunction &MF) const {
	return !getStackProbeSymbolName(MF).empty();
	}

	/// Returns true if stack probing through inline assembly is requested.
	bool X86TargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {

	// No inline stack probe for Windows, they have their own mechanism.
	if (Subtarget.isOSWindows() \|\|
	MF.getFunction().hasFnAttribute("no-stack-arg-probe"))
	return false;

	// If the function specifically requests inline stack probes, emit them.
	if (MF.getFunction().hasFnAttribute("probe-stack"))
	return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
	"inline-asm";

	return false;
	}

	/// Returns the name of the symbol used to emit stack probes or the empty
	/// string if not applicable.
	StringRef
	X86TargetLowering::getStackProbeSymbolName(const MachineFunction &MF) const {
	// Inline Stack probes disable stack probe call
	if (hasInlineStackProbe(MF))
	return "";

	// If the function specifically requests stack probes, emit them.
	if (MF.getFunction().hasFnAttribute("probe-stack"))
	return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();

	// Generally, if we aren't on Windows, the platform ABI does not include
	// support for stack probes, so don't emit them.
	if (!Subtarget.isOSWindows() \|\| Subtarget.isTargetMachO() \|\|
	MF.getFunction().hasFnAttribute("no-stack-arg-probe"))
	return "";

	// We need a stack probe to conform to the Windows ABI. Choose the right
	// symbol.
	if (Subtarget.is64Bit())
	return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
	return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
	}

	unsigned
	X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
	// The default stack probe size is 4096 if the function has no stackprobesize
	// attribute.
	return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size",
	4096);
	}

	Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
	if (ML->isInnermost() &&
	ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
	return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
	return TargetLowering::getPrefLoopAlignment();
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
	index 6da4dd2b942c..888e69ac4ac0 100644
	--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
	+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
	@@ -1,13750 +1,13771 @@
	//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------- tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file describes the X86 AVX512 instruction set, defining the
	// instructions, and properties of the instructions which are needed for code
	// generation, machine code emission, and analysis.
	//
	//===----------------------------------------------------------------------===//

	// Group template arguments that can be derived from the vector type (EltNum x
	// EltVT). These are things like the register class for the writemask, etc.
	// The idea is to pass one of these as the template argument rather than the
	// individual arguments.
	// The template is also used for scalar types, in this case numelts is 1.
	class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
	string suffix = ""> {
	RegisterClass RC = rc;
	ValueType EltVT = eltvt;
	int NumElts = numelts;

	// Corresponding mask register class.
	RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);

	// Corresponding mask register pair class.
	RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
	!cast<RegisterOperand>("VK" # NumElts # "Pair"));

	// Corresponding write-mask register class.
	RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");

	// The mask VT.
	ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");

	// Suffix used in the instruction mnemonic.
	string Suffix = suffix;

	// VTName is a string name for vector VT. For vector types it will be
	// v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
	// It is a little bit complex for scalar types, where NumElts = 1.
	// In this case we build v4f32 or v2f64
	string VTName = "v" # !if (!eq (NumElts, 1),
	!if (!eq (EltVT.Size, 16), 8,
	!if (!eq (EltVT.Size, 32), 4,
	!if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;

	// The vector VT.
	ValueType VT = !cast<ValueType>(VTName);

	string EltTypeName = !cast<string>(EltVT);
	// Size of the element type in bits, e.g. 32 for v16i32.
	string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
	int EltSize = EltVT.Size;

	// "i" for integer types and "f" for floating-point types
	string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));

	// Size of RC in bits, e.g. 512 for VR512.
	int Size = VT.Size;

	// The corresponding memory operand, e.g. i512mem for VR512.
	X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
	X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
	// FP scalar memory operand for intrinsics - ssmem/sdmem.
	Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
	!if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
	!if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
	!if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));

	// Load patterns
	PatFrag LdFrag = !cast<PatFrag>("load" # VTName);

	PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);

	PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
	PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);

	PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
	!if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
	!if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
	!if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));

	// The string to specify embedded broadcast in assembly.
	string BroadcastStr = "{1to" # NumElts # "}";

	// 8-bit compressed displacement tuple/subvector format. This is only
	// defined for NumElts <= 8.
	CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
	!cast<CD8VForm>("CD8VT" # NumElts), ?);

	SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
	!if (!eq (Size, 256), sub_ymm, ?));

	Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
	!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
	!if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
	!if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
	SSEPackedInt))));

	RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
	!if (!eq (EltTypeName, "f16"), FR16X,
	!if (!eq (EltTypeName, "bf16"), FR16X,
	FR64X)));

	dag ImmAllZerosV = (VT immAllZerosV);

	string ZSuffix = !if (!eq (Size, 128), "Z128",
	!if (!eq (Size, 256), "Z256", "Z"));
	}

	def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
	def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
	def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
	def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
	def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
	def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
	def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
	def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;

	// "x" in v32i8x_info means RC = VR256X
	def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
	def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
	def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
	def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
	def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
	def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
	def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
	def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;

	def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
	def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
	def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
	def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
	def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
	def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">;
	def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
	def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;

	// We map scalar types to the smallest (128-bit) vector type
	// with the appropriate element type. This allows to use the same masking logic.
	def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
	def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
	def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
	def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">;
	def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
	def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;

	class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
	X86VectorVTInfo i128> {
	X86VectorVTInfo info512 = i512;
	X86VectorVTInfo info256 = i256;
	X86VectorVTInfo info128 = i128;
	}

	def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
	v16i8x_info>;
	def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
	v8i16x_info>;
	def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
	v4i32x_info>;
	def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
	v2i64x_info>;
	def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
	v8f16x_info>;
	def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
	v8bf16x_info>;
	def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
	v4f32x_info>;
	def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
	v2f64x_info>;

	class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
	ValueType _vt> {
	RegisterClass KRC = _krc;
	RegisterClass KRCWM = _krcwm;
	ValueType KVT = _vt;
	}

	def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
	def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
	def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
	def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
	def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
	def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
	def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;

	// Used for matching masked operations. Ensures the operation part only has a
	// single use.
	def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
	(vselect node:$mask, node:$src1, node:$src2), [{
	return isProfitableToFormMaskedOp(N);
	}]>;

	def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
	(X86selects node:$mask, node:$src1, node:$src2), [{
	return isProfitableToFormMaskedOp(N);
	}]>;

	// This multiclass generates the masking variants from the non-masking
	// variant. It only provides the assembly pieces for the masking variants.
	// It assumes custom ISel patterns for masking which can be provided as
	// template arguments.
	multiclass AVX512_maskable_custom<bits<8> O, Format F,
	dag Outs,
	dag Ins, dag MaskingIns, dag ZeroMaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern,
	list<dag> MaskingPattern,
	list<dag> ZeroMaskingPattern,
	string MaskingConstraint = "",
	bit IsCommutable = 0,
	bit IsKCommutable = 0,
	bit IsKZCommutable = IsCommutable,
	string ClobberConstraint = ""> {
	let isCommutable = IsCommutable, Constraints = ClobberConstraint in
	def NAME: AVX512<O, F, Outs, Ins,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst\|"#
	"$dst, "#IntelSrcAsm#"}",
	Pattern>;

	// Prefer over VMOV*rrk Pat<>
	let isCommutable = IsKCommutable in
	def NAME#k: AVX512<O, F, Outs, MaskingIns,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}\|"#
	"$dst {${mask}}, "#IntelSrcAsm#"}",
	MaskingPattern>,
	EVEX_K {
	// In case of the 3src subclass this is overridden with a let.
	string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
	!if(!eq(MaskingConstraint, ""), ClobberConstraint,
	!strconcat(ClobberConstraint, ", ", MaskingConstraint)));
	}

	// Zero mask does not add any restrictions to commute operands transformation.
	// So, it is Ok to use IsCommutable instead of IsKCommutable.
	let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
	Constraints = ClobberConstraint in
	def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
	ZeroMaskingPattern>,
	EVEX_KZ;
	}


	// Common base class of AVX512_maskable and AVX512_maskable_3src.
	multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs,
	dag Ins, dag MaskingIns, dag ZeroMaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskingRHS,
	SDPatternOperator Select = vselect_mask,
	string MaskingConstraint = "",
	bit IsCommutable = 0,
	bit IsKCommutable = 0,
	bit IsKZCommutable = IsCommutable,
	string ClobberConstraint = ""> :
	AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
	AttSrcAsm, IntelSrcAsm,
	[(set _.RC:$dst, RHS)],
	[(set _.RC:$dst, MaskingRHS)],
	[(set _.RC:$dst,
	(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
	MaskingConstraint, IsCommutable,
	IsKCommutable, IsKZCommutable, ClobberConstraint>;

	// This multiclass generates the unconditional/non-masking, the masking and
	// the zero-masking variant of the vector instruction. In the masking case, the
	// preserved vector elements come from a new dummy input operand tied to $dst.
	// This version uses a separate dag for non-masking and masking.
	multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskRHS,
	string ClobberConstraint = "",
	bit IsCommutable = 0, bit IsKCommutable = 0,
	bit IsKZCommutable = IsCommutable> :
	AVX512_maskable_custom<O, F, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm,
	[(set _.RC:$dst, RHS)],
	[(set _.RC:$dst,
	(vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
	[(set _.RC:$dst,
	(vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
	"$src0 = $dst", IsCommutable, IsKCommutable,
	IsKZCommutable, ClobberConstraint>;

	// This multiclass generates the unconditional/non-masking, the masking and
	// the zero-masking variant of the vector instruction. In the masking case, the
	// preserved vector elements come from a new dummy input operand tied to $dst.
	multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS,
	bit IsCommutable = 0, bit IsKCommutable = 0,
	bit IsKZCommutable = IsCommutable,
	SDPatternOperator Select = vselect_mask,
	string ClobberConstraint = ""> :
	AVX512_maskable_common<O, F, _, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(Select _.KRCWM:$mask, RHS, _.RC:$src0),
	Select, "$src0 = $dst", IsCommutable, IsKCommutable,
	IsKZCommutable, ClobberConstraint>;

	// This multiclass generates the unconditional/non-masking, the masking and
	// the zero-masking variant of the scalar instruction.
	multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS> :
	AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
	RHS, 0, 0, 0, X86selects_mask>;

	// Similar to AVX512_maskable but in this case one of the source operands
	// ($src1) is already tied to $dst so we just use that for the preserved
	// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
	// $src1.
	multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag NonTiedIns, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS,
	bit IsCommutable = 0,
	bit IsKCommutable = 0,
	SDPatternOperator Select = vselect_mask,
	bit MaskOnly = 0> :
	AVX512_maskable_common<O, F, _, Outs,
	!con((ins _.RC:$src1), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	OpcodeStr, AttSrcAsm, IntelSrcAsm,
	!if(MaskOnly, (null_frag), RHS),
	(Select _.KRCWM:$mask, RHS, _.RC:$src1),
	Select, "", IsCommutable, IsKCommutable>;

	// Similar to AVX512_maskable_3src but in this case the input VT for the tied
	// operand differs from the output VT. This requires a bitconvert on
	// the preserved vector going into the vselect.
	// NOTE: The unmasked pattern is disabled.
	multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
	X86VectorVTInfo InVT,
	dag Outs, dag NonTiedIns, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, bit IsCommutable = 0> :
	AVX512_maskable_common<O, F, OutVT, Outs,
	!con((ins InVT.RC:$src1), NonTiedIns),
	!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
	!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
	(vselect_mask InVT.KRCWM:$mask, RHS,
	(bitconvert InVT.RC:$src1)),
	vselect_mask, "", IsCommutable>;

	multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag NonTiedIns, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS,
	bit IsCommutable = 0,
	bit IsKCommutable = 0,
	bit MaskOnly = 0> :
	AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
	IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
	X86selects_mask, MaskOnly>;

	multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern> :
	AVX512_maskable_custom<O, F, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
	"$src0 = $dst">;

	multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag NonTiedIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern> :
	AVX512_maskable_custom<O, F, Outs,
	!con((ins _.RC:$src1), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
	"">;

	// Instruction with mask that puts result in mask register,
	// like "compare" and "vptest"
	multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
	dag Outs,
	dag Ins, dag MaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern,
	list<dag> MaskingPattern,
	bit IsCommutable = 0> {
	let isCommutable = IsCommutable in {
	def NAME: AVX512<O, F, Outs, Ins,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst\|"#
	"$dst, "#IntelSrcAsm#"}",
	Pattern>;

	def NAME#k: AVX512<O, F, Outs, MaskingIns,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}\|"#
	"$dst {${mask}}, "#IntelSrcAsm#"}",
	MaskingPattern>, EVEX_K;
	}
	}

	multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs,
	dag Ins, dag MaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskingRHS,
	bit IsCommutable = 0> :
	AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
	AttSrcAsm, IntelSrcAsm,
	[(set _.KRC:$dst, RHS)],
	[(set _.KRC:$dst, MaskingRHS)], IsCommutable>;

	multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag RHS_su, bit IsCommutable = 0> :
	AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(and _.KRCWM:$mask, RHS_su), IsCommutable>;

	// Used by conversion instructions.
	multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs,
	dag Ins, dag MaskingIns, dag ZeroMaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
	AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
	AttSrcAsm, IntelSrcAsm,
	[(set _.RC:$dst, RHS)],
	[(set _.RC:$dst, MaskingRHS)],
	[(set _.RC:$dst, ZeroMaskingRHS)],
	"$src0 = $dst">;

	multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag NonTiedIns, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskingRHS, bit IsCommutable,
	bit IsKCommutable> :
	AVX512_maskable_custom<O, F, Outs,
	!con((ins _.RC:$src1), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	OpcodeStr, AttSrcAsm, IntelSrcAsm,
	[(set _.RC:$dst, RHS)],
	[(set _.RC:$dst,
	(vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
	[(set _.RC:$dst,
	(vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
	"", IsCommutable, IsKCommutable>;

	// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
	// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
	// swizzled by ExecutionDomainFix to pxor.
	// We set canFoldAsLoad because this can be converted to a constant-pool
	// load of an all-zeros value if folding it would be beneficial.
	let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
	isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
	def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
	[(set VR512:$dst, (v16i32 immAllZerosV))]>;
	def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
	[(set VR512:$dst, (v16i32 immAllOnesV))]>;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
	def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
	def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
	def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
	def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
	def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
	}

	// Alias instructions that allow VPTERNLOG to be used with a mask to create
	// a mix of all ones and all zeros elements. This is done this way to force
	// the same register to be used as input for all three sources.
	let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
	def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
	(ins VK16WM:$mask), "",
	[(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
	(v16i32 immAllOnesV),
	(v16i32 immAllZerosV)))]>;
	def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
	(ins VK8WM:$mask), "",
	[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
	(v8i64 immAllOnesV),
	(v8i64 immAllZerosV)))]>;
	}

	let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
	isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
	def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
	[(set VR128X:$dst, (v4i32 immAllZerosV))]>;
	def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
	[(set VR256X:$dst, (v8i32 immAllZerosV))]>;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
	def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
	def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
	def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
	def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
	def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
	def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
	def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
	def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
	def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
	def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
	def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
	}

	// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
	// This is expanded by ExpandPostRAPseudos.
	let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
	isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
	def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
	[(set FR16X:$dst, fp16imm0)]>;
	def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
	[(set FR32X:$dst, fp32imm0)]>;
	def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
	[(set FR64X:$dst, fp64imm0)]>;
	def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
	[(set VR128X:$dst, fp128imm0)]>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - VECTOR INSERT
	//

	// Supports two different pattern operators for mask and unmasked ops. Allows
	// null_frag to be passed for one.
	multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
	X86VectorVTInfo To,
	SDPatternOperator vinsert_insert,
	SDPatternOperator vinsert_for_mask,
	X86FoldableSchedWrite sched> {
	let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
	defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
	(ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
	"vinsert" # From.EltTypeName # "x" # From.NumElts,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(vinsert_insert:$src3 (To.VT To.RC:$src1),
	(From.VT From.RC:$src2),
	(iPTR imm)),
	(vinsert_for_mask:$src3 (To.VT To.RC:$src1),
	(From.VT From.RC:$src2),
	(iPTR imm))>,
	AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
	let mayLoad = 1 in
	defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
	(ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
	"vinsert" # From.EltTypeName # "x" # From.NumElts,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(vinsert_insert:$src3 (To.VT To.RC:$src1),
	(From.VT (From.LdFrag addr:$src2)),
	(iPTR imm)),
	(vinsert_for_mask:$src3 (To.VT To.RC:$src1),
	(From.VT (From.LdFrag addr:$src2)),
	(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
	EVEX_CD8<From.EltSize, From.CD8TupleForm>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	// Passes the same pattern operator for masked and unmasked ops.
	multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
	X86VectorVTInfo To,
	SDPatternOperator vinsert_insert,
	X86FoldableSchedWrite sched> :
	vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;

	multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
	X86VectorVTInfo To, PatFrag vinsert_insert,
	SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(vinsert_insert:$ins
	(To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
	(To.VT (!cast<Instruction>(InstrStr#"rr")
	To.RC:$src1, From.RC:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins)))>;

	def : Pat<(vinsert_insert:$ins
	(To.VT To.RC:$src1),
	(From.VT (From.LdFrag addr:$src2)),
	(iPTR imm)),
	(To.VT (!cast<Instruction>(InstrStr#"rm")
	To.RC:$src1, addr:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins)))>;
	}
	}

	multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
	ValueType EltVT64, int Opcode256,
	X86FoldableSchedWrite sched> {

	let Predicates = [HasVLX] in
	defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	vinsert128_insert, sched>, EVEX_V256;

	defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	X86VectorVTInfo<16, EltVT32, VR512>,
	vinsert128_insert, sched>, EVEX_V512;

	defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	vinsert256_insert, sched>, VEX_W, EVEX_V512;

	// Even with DQI we'd like to only use these instructions for masking.
	let Predicates = [HasVLX, HasDQI] in
	defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	null_frag, vinsert128_insert, sched>,
	VEX_W1X, EVEX_V256;

	// Even with DQI we'd like to only use these instructions for masking.
	let Predicates = [HasDQI] in {
	defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	null_frag, vinsert128_insert, sched>,
	VEX_W, EVEX_V512;

	defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	X86VectorVTInfo<16, EltVT32, VR512>,
	null_frag, vinsert256_insert, sched>,
	EVEX_V512;
	}
	}

	// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
	defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
	defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;

	// Codegen pattern with the alternative types,
	// Even with AVX512DQ we'll still use these for unmasked operations.
	defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;

	defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;

	defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;

	// Codegen pattern with the alternative types insert VEC128 into VEC256
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
	// Codegen pattern with the alternative types insert VEC128 into VEC512
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
	// Codegen pattern with the alternative types insert VEC256 into VEC512
	defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;


	multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
	X86VectorVTInfo To, X86VectorVTInfo Cast,
	PatFrag vinsert_insert,
	SDNodeXForm INSERT_get_vinsert_imm,
	list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(Cast.VT
	(vselect_mask Cast.KRCWM:$mask,
	(bitconvert
	(vinsert_insert:$ins (To.VT To.RC:$src1),
	(From.VT From.RC:$src2),
	(iPTR imm))),
	Cast.RC:$src0)),
	(!cast<Instruction>(InstrStr#"rrk")
	Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins))>;
	def : Pat<(Cast.VT
	(vselect_mask Cast.KRCWM:$mask,
	(bitconvert
	(vinsert_insert:$ins (To.VT To.RC:$src1),
	(From.VT
	(bitconvert
	(From.LdFrag addr:$src2))),
	(iPTR imm))),
	Cast.RC:$src0)),
	(!cast<Instruction>(InstrStr#"rmk")
	Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins))>;

	def : Pat<(Cast.VT
	(vselect_mask Cast.KRCWM:$mask,
	(bitconvert
	(vinsert_insert:$ins (To.VT To.RC:$src1),
	(From.VT From.RC:$src2),
	(iPTR imm))),
	Cast.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#"rrkz")
	Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins))>;
	def : Pat<(Cast.VT
	(vselect_mask Cast.KRCWM:$mask,
	(bitconvert
	(vinsert_insert:$ins (To.VT To.RC:$src1),
	(From.VT (From.LdFrag addr:$src2)),
	(iPTR imm))),
	Cast.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#"rmkz")
	Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins))>;
	}
	}

	defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
	v8f32x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
	v4f64x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;

	defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
	v8i32x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
	v8i32x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
	v8i32x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
	v4i64x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
	defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
	v4i64x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
	defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
	v4i64x_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;

	defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
	v16f32_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
	v8f64_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI]>;

	defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
	v16i32_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
	v16i32_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
	v16i32_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
	v8i64_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI]>;
	defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
	v8i64_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI]>;
	defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
	v8i64_info, vinsert128_insert,
	INSERT_get_vinsert128_imm, [HasDQI]>;

	defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
	v16f32_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasDQI]>;
	defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
	v8f64_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasAVX512]>;

	defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
	v16i32_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasDQI]>;
	defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
	v16i32_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasDQI]>;
	defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
	v16i32_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasDQI]>;
	defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
	v8i64_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasAVX512]>;
	defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
	v8i64_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasAVX512]>;
	defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
	v8i64_info, vinsert256_insert,
	INSERT_get_vinsert256_imm, [HasAVX512]>;

	// vinsertps - insert f32 to XMM
	let ExeDomain = SSEPackedSingle in {
	let isCommutable = 1 in
	def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
	"vinsertps\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
	EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
	def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
	(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
	"vinsertps\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set VR128X:$dst, (X86insertps VR128X:$src1,
	(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
	timm:$src3))]>,
	EVEX_4V, EVEX_CD8<32, CD8VT1>,
	Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 VECTOR EXTRACT
	//---

	// Supports two different pattern operators for mask and unmasked ops. Allows
	// null_frag to be passed for one.
	multiclass vextract_for_size_split<int Opcode,
	X86VectorVTInfo From, X86VectorVTInfo To,
	SDPatternOperator vextract_extract,
	SDPatternOperator vextract_for_mask,
	SchedWrite SchedRR, SchedWrite SchedMR> {

	let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
	defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
	(ins From.RC:$src1, u8imm:$idx),
	"vextract" # To.EltTypeName # "x" # To.NumElts,
	"$idx, $src1", "$src1, $idx",
	(vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
	(vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
	AVX512AIi8Base, EVEX, Sched<[SchedRR]>;

	def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
	(ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
	"vextract" # To.EltTypeName # "x" # To.NumElts #
	"\t{$idx, $src1, $dst\|$dst, $src1, $idx}",
	[(store (To.VT (vextract_extract:$idx
	(From.VT From.RC:$src1), (iPTR imm))),
	addr:$dst)]>, EVEX,
	Sched<[SchedMR]>;

	let mayStore = 1, hasSideEffects = 0 in
	def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
	(ins To.MemOp:$dst, To.KRCWM:$mask,
	From.RC:$src1, u8imm:$idx),
	"vextract" # To.EltTypeName # "x" # To.NumElts #
	"\t{$idx, $src1, $dst {${mask}}\|"
	"$dst {${mask}}, $src1, $idx}", []>,
	EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
	}
	}

	// Passes the same pattern operator for masked and unmasked ops.
	multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
	X86VectorVTInfo To,
	SDPatternOperator vextract_extract,
	SchedWrite SchedRR, SchedWrite SchedMR> :
	vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;

	// Codegen pattern for the alternative types
	multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
	X86VectorVTInfo To, PatFrag vextract_extract,
	SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
	(To.VT (!cast<Instruction>(InstrStr#"rr")
	From.RC:$src1,
	(EXTRACT_get_vextract_imm To.RC:$ext)))>;
	def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
	(iPTR imm))), addr:$dst),
	(!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
	(EXTRACT_get_vextract_imm To.RC:$ext))>;
	}
	}

	multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
	ValueType EltVT64, int Opcode256,
	SchedWrite SchedRR, SchedWrite SchedMR> {
	let Predicates = [HasAVX512] in {
	defm NAME # "32x4Z" : vextract_for_size<Opcode128,
	X86VectorVTInfo<16, EltVT32, VR512>,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	vextract128_extract, SchedRR, SchedMR>,
	EVEX_V512, EVEX_CD8<32, CD8VT4>;
	defm NAME # "64x4Z" : vextract_for_size<Opcode256,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	vextract256_extract, SchedRR, SchedMR>,
	VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
	}
	let Predicates = [HasVLX] in
	defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	vextract128_extract, SchedRR, SchedMR>,
	EVEX_V256, EVEX_CD8<32, CD8VT4>;

	// Even with DQI we'd like to only use these instructions for masking.
	let Predicates = [HasVLX, HasDQI] in
	defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	null_frag, vextract128_extract, SchedRR, SchedMR>,
	VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;

	// Even with DQI we'd like to only use these instructions for masking.
	let Predicates = [HasDQI] in {
	defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	null_frag, vextract128_extract, SchedRR, SchedMR>,
	VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
	defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
	X86VectorVTInfo<16, EltVT32, VR512>,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	null_frag, vextract256_extract, SchedRR, SchedMR>,
	EVEX_V512, EVEX_CD8<32, CD8VT8>;
	}
	}

	// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
	defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
	defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;

	// extract_subvector codegen patterns with the alternative types.
	// Even with AVX512DQ we'll still use these for unmasked operations.
	defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;

	defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;

	defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;

	// Codegen pattern with the alternative types extract VEC128 from VEC256
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;

	// Codegen pattern with the alternative types extract VEC128 from VEC512
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
	// Codegen pattern with the alternative types extract VEC256 from VEC512
	defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;


	// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
	// smaller extract to enable EVEX->VEX.
	let Predicates = [NoVLX] in {
	def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
	(v2i64 (VEXTRACTI128rr
	(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
	(v2f64 (VEXTRACTF128rr
	(v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
	(v4i32 (VEXTRACTI128rr
	(v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
	(v4f32 (VEXTRACTF128rr
	(v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
	(v8i16 (VEXTRACTI128rr
	(v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
	(v8f16 (VEXTRACTF128rr
	(v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
	(v16i8 (VEXTRACTI128rr
	(v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	}

	// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
	// smaller extract to enable EVEX->VEX.
	let Predicates = [HasVLX] in {
	def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
	(v2i64 (VEXTRACTI32x4Z256rr
	(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
	(v2f64 (VEXTRACTF32x4Z256rr
	(v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
	(v4i32 (VEXTRACTI32x4Z256rr
	(v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
	(v4f32 (VEXTRACTF32x4Z256rr
	(v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
	(v8i16 (VEXTRACTI32x4Z256rr
	(v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
	(v8f16 (VEXTRACTF32x4Z256rr
	(v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
	(v16i8 (VEXTRACTI32x4Z256rr
	(v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
	(iPTR 1)))>;
	}


	// Additional patterns for handling a bitcast between the vselect and the
	// extract_subvector.
	multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
	X86VectorVTInfo To, X86VectorVTInfo Cast,
	PatFrag vextract_extract,
	SDNodeXForm EXTRACT_get_vextract_imm,
	list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
	(bitconvert
	(To.VT (vextract_extract:$ext
	(From.VT From.RC:$src), (iPTR imm)))),
	To.RC:$src0)),
	(Cast.VT (!cast<Instruction>(InstrStr#"rrk")
	Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
	(EXTRACT_get_vextract_imm To.RC:$ext)))>;

	def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
	(bitconvert
	(To.VT (vextract_extract:$ext
	(From.VT From.RC:$src), (iPTR imm)))),
	Cast.ImmAllZerosV)),
	(Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
	Cast.KRCWM:$mask, From.RC:$src,
	(EXTRACT_get_vextract_imm To.RC:$ext)))>;
	}
	}

	defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
	v4f32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
	v2f64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;

	defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
	v4i32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
	v4i32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
	v4i32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
	v2i64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
	v2i64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
	v2i64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;

	defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
	v4f32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
	v2f64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI]>;

	defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
	v4i32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
	v4i32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
	v4i32x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
	v2i64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
	v2i64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
	v2i64x_info, vextract128_extract,
	EXTRACT_get_vextract128_imm, [HasDQI]>;

	defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
	v8f32x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasDQI]>;
	defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
	v4f64x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasAVX512]>;

	defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
	v8i32x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasDQI]>;
	defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
	v8i32x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasDQI]>;
	defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
	v8i32x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasDQI]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
	v4i64x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasAVX512]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
	v4i64x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasAVX512]>;
	defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
	v4i64x_info, vextract256_extract,
	EXTRACT_get_vextract256_imm, [HasAVX512]>;

	// vextractps - extract 32 bits from XMM
	def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
	(ins VR128X:$src1, u8imm:$src2),
	"vextractps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
	EVEX, VEX_WIG, Sched<[WriteVecExtract]>;

	def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
	(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
	"vextractps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
	addr:$dst)]>,
	EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;

	//===---------------------------------------------------------------------===//
	// AVX-512 BROADCAST
	//---
	// broadcast with a scalar argument.
	multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
	X86VectorVTInfo SrcInfo> {
	def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
	(!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
	(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
	def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
	(X86VBroadcast SrcInfo.FRC:$src),
	DestInfo.RC:$src0)),
	(!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
	DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
	(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
	def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
	(X86VBroadcast SrcInfo.FRC:$src),
	DestInfo.ImmAllZerosV)),
	(!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
	DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
	}

	// Split version to allow mask and broadcast node to be different types. This
	// helps support the 32x2 broadcasts.
	multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
	SchedWrite SchedRR, SchedWrite SchedRM,
	X86VectorVTInfo MaskInfo,
	X86VectorVTInfo DestInfo,
	X86VectorVTInfo SrcInfo,
	bit IsConvertibleToThreeAddress,
	SDPatternOperator UnmaskedOp = X86VBroadcast,
	SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
	let hasSideEffects = 0 in
	def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set MaskInfo.RC:$dst,
	(MaskInfo.VT
	(bitconvert
	(DestInfo.VT
	(UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
	DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
	def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
	(ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}\|",
	"${dst} {${mask}} {z}, $src}"),
	[(set MaskInfo.RC:$dst,
	(vselect_mask MaskInfo.KRCWM:$mask,
	(MaskInfo.VT
	(bitconvert
	(DestInfo.VT
	(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
	MaskInfo.ImmAllZerosV))],
	DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
	let Constraints = "$src0 = $dst" in
	def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
	(ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
	SrcInfo.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}\|",
	"${dst} {${mask}}, $src}"),
	[(set MaskInfo.RC:$dst,
	(vselect_mask MaskInfo.KRCWM:$mask,
	(MaskInfo.VT
	(bitconvert
	(DestInfo.VT
	(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
	MaskInfo.RC:$src0))],
	DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;

	let hasSideEffects = 0, mayLoad = 1 in
	def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
	(ins SrcInfo.ScalarMemOp:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set MaskInfo.RC:$dst,
	(MaskInfo.VT
	(bitconvert
	(DestInfo.VT
	(UnmaskedBcastOp addr:$src)))))],
	DestInfo.ExeDomain>, T8PD, EVEX,
	EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;

	def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
	(ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
	!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}\|",
	"${dst} {${mask}} {z}, $src}"),
	[(set MaskInfo.RC:$dst,
	(vselect_mask MaskInfo.KRCWM:$mask,
	(MaskInfo.VT
	(bitconvert
	(DestInfo.VT
	(SrcInfo.BroadcastLdFrag addr:$src)))),
	MaskInfo.ImmAllZerosV))],
	DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
	EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;

	let Constraints = "$src0 = $dst",
	isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
	def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
	(ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
	SrcInfo.ScalarMemOp:$src),
	!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}\|",
	"${dst} {${mask}}, $src}"),
	[(set MaskInfo.RC:$dst,
	(vselect_mask MaskInfo.KRCWM:$mask,
	(MaskInfo.VT
	(bitconvert
	(DestInfo.VT
	(SrcInfo.BroadcastLdFrag addr:$src)))),
	MaskInfo.RC:$src0))],
	DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
	EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
	}

	// Helper class to force mask and broadcast result to same type.
	multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
	SchedWrite SchedRR, SchedWrite SchedRM,
	X86VectorVTInfo DestInfo,
	X86VectorVTInfo SrcInfo,
	bit IsConvertibleToThreeAddress> :
	avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
	DestInfo, DestInfo, SrcInfo,
	IsConvertibleToThreeAddress>;

	multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
	WriteFShuffle256Ld, _.info512, _.info128, 1>,
	avx512_broadcast_scalar<NAME, _.info512, _.info128>,
	EVEX_V512;
	}

	let Predicates = [HasVLX] in {
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
	WriteFShuffle256Ld, _.info256, _.info128, 1>,
	avx512_broadcast_scalar<NAME, _.info256, _.info128>,
	EVEX_V256;
	}
	}

	multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
	WriteFShuffle256Ld, _.info512, _.info128, 1>,
	avx512_broadcast_scalar<NAME, _.info512, _.info128>,
	EVEX_V512;
	}

	let Predicates = [HasVLX] in {
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
	WriteFShuffle256Ld, _.info256, _.info128, 1>,
	avx512_broadcast_scalar<NAME, _.info256, _.info128>,
	EVEX_V256;
	defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
	WriteFShuffle256Ld, _.info128, _.info128, 1>,
	avx512_broadcast_scalar<NAME, _.info128, _.info128>,
	EVEX_V128;
	}
	}
	defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
	avx512vl_f32_info>;
	defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
	avx512vl_f64_info>, VEX_W1X;

	multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
	X86VectorVTInfo _, SDPatternOperator OpNode,
	RegisterClass SrcRC> {
	// Fold with a mask even if it has multiple uses since it is cheap.
	let ExeDomain = _.ExeDomain in
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins SrcRC:$src),
	"vpbroadcast"#_.Suffix, "$src", "$src",
	(_.VT (OpNode SrcRC:$src)), /IsCommutable/0,
	/IsKCommutable/0, /IsKZCommutable/0, vselect>,
	T8PD, EVEX, Sched<[SchedRR]>;
	}

	multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
	X86VectorVTInfo _, SDPatternOperator OpNode,
	RegisterClass SrcRC, SubRegIndex Subreg> {
	let hasSideEffects = 0, ExeDomain = _.ExeDomain in
	defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
	(outs _.RC:$dst), (ins GR32:$src),
	!con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
	!con((ins _.KRCWM:$mask), (ins GR32:$src)),
	"vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
	"$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;

	def : Pat <(_.VT (OpNode SrcRC:$src)),
	(!cast<Instruction>(Name#rr)
	(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;

	// Fold with a mask even if it has multiple uses since it is cheap.
	def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
	(!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
	(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;

	def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
	(!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
	(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
	}

	multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
	AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
	RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
	OpNode, SrcRC, Subreg>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
	_.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
	defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
	_.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
	}
	}

	multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
	SDPatternOperator OpNode,
	RegisterClass SrcRC, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
	SrcRC>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
	SrcRC>, EVEX_V256;
	defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
	SrcRC>, EVEX_V128;
	}
	}

	defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
	avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
	defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
	avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
	HasBWI>;
	defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
	X86VBroadcast, GR32, HasAVX512>;
	defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
	X86VBroadcast, GR64, HasAVX512>, VEX_W;

	multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	bit IsConvertibleToThreeAddress> {
	let Predicates = [prd] in {
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
	WriteShuffle256Ld, _.info512, _.info128,
	IsConvertibleToThreeAddress>,
	EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
	WriteShuffle256Ld, _.info256, _.info128,
	IsConvertibleToThreeAddress>,
	EVEX_V256;
	defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
	WriteShuffleXLd, _.info128, _.info128,
	IsConvertibleToThreeAddress>,
	EVEX_V128;
	}
	}

	defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
	avx512vl_i8_info, HasBWI, 0>;
	defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
	avx512vl_i16_info, HasBWI, 0>;
	defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
	avx512vl_i32_info, HasAVX512, 1>;
	defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
	avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;

	multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode,
	X86VectorVTInfo _Dst,
	X86VectorVTInfo _Src> {
	defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
	(_Dst.VT (OpNode addr:$src))>,
	Sched<[SchedWriteShuffle.YMM.Folded]>,
	AVX5128IBase, EVEX;
	}

	// This should be used for the AVX512DQ broadcast instructions. It disables
	// the unmasked patterns so that we only use the DQ instructions when masking
	// is requested.
	multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode,
	X86VectorVTInfo _Dst,
	X86VectorVTInfo _Src> {
	let hasSideEffects = 0, mayLoad = 1 in
	defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
	(null_frag),
	(_Dst.VT (OpNode addr:$src))>,
	Sched<[SchedWriteShuffle.YMM.Folded]>,
	AVX5128IBase, EVEX;
	}
	let Predicates = [HasBWI] in {
	def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
	(VPBROADCASTWZrm addr:$src)>;

	def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
	(VPBROADCASTWZrr VR128X:$src)>;
	def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
	(VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
	}
	let Predicates = [HasVLX, HasBWI] in {
	def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
	(VPBROADCASTWZ128rm addr:$src)>;
	def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
	(VPBROADCASTWZ256rm addr:$src)>;

	def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
	(VPBROADCASTWZ128rr VR128X:$src)>;
	def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
	(VPBROADCASTWZ256rr VR128X:$src)>;

	def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
	(VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
	def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
	(VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 BROADCAST SUBVECTORS
	//

	defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
	X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT4>;
	defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
	X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT4>;
	defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
	X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT4>;
	defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
	X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT4>;

	let Predicates = [HasAVX512] in {
	def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTF64X4rm addr:$src)>;
	def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTF64X4rm addr:$src)>;
	def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTF64X4rm addr:$src)>;
	def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTI64X4rm addr:$src)>;
	def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTI64X4rm addr:$src)>;
	def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTI64X4rm addr:$src)>;
	def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
	(VBROADCASTI64X4rm addr:$src)>;

	def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTF32X4rm addr:$src)>;
	def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTF32X4rm addr:$src)>;
	def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTF32X4rm addr:$src)>;
	def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4rm addr:$src)>;
	def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4rm addr:$src)>;
	def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4rm addr:$src)>;
	def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4rm addr:$src)>;

	// Patterns for selects of bitcasted operations.
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
	(v16f32 immAllZerosV)),
	(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
	VR512:$src0),
	(VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
	(v16i32 immAllZerosV)),
	(VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
	VR512:$src0),
	(VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;

	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
	(v8f64 immAllZerosV)),
	(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
	VR512:$src0),
	(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
	(v8i64 immAllZerosV)),
	(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
	VR512:$src0),
	(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
	}

	let Predicates = [HasVLX] in {
	defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
	X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VT4>;
	defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
	X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VT4>;

	def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTF32X4Z256rm addr:$src)>;
	def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTF32X4Z256rm addr:$src)>;
	def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTF32X4Z256rm addr:$src)>;
	def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4Z256rm addr:$src)>;
	def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4Z256rm addr:$src)>;
	def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4Z256rm addr:$src)>;
	def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
	(VBROADCASTI32X4Z256rm addr:$src)>;

	// Patterns for selects of bitcasted operations.
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
	(v8f32 immAllZerosV)),
	(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
	VR256X:$src0),
	(VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
	(v8i32 immAllZerosV)),
	(VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
	VR256X:$src0),
	(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
	}

	let Predicates = [HasVLX, HasDQI] in {
	defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
	X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
	EVEX_V256, EVEX_CD8<64, CD8VT2>;
	defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
	X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
	EVEX_V256, EVEX_CD8<64, CD8VT2>;

	// Patterns for selects of bitcasted operations.
	def : Pat<(vselect_mask VK4WM:$mask,
	(bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
	(v4f64 immAllZerosV)),
	(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK4WM:$mask,
	(bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
	VR256X:$src0),
	(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK4WM:$mask,
	(bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
	(v4i64 immAllZerosV)),
	(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK4WM:$mask,
	(bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
	VR256X:$src0),
	(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
	}

	let Predicates = [HasDQI] in {
	defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
	X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT2>;
	defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
	X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT8>;
	defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
	X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT2>;
	defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
	X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT8>;

	// Patterns for selects of bitcasted operations.
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
	(v16f32 immAllZerosV)),
	(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
	VR512:$src0),
	(VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
	(v16i32 immAllZerosV)),
	(VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK16WM:$mask,
	(bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
	VR512:$src0),
	(VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;

	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
	(v8f64 immAllZerosV)),
	(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
	VR512:$src0),
	(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
	(v8i64 immAllZerosV)),
	(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(vselect_mask VK8WM:$mask,
	(bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
	VR512:$src0),
	(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
	}

	multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _Dst,
	AVX512VLVectorVTInfo _Src> {
	let Predicates = [HasDQI] in
	defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
	WriteShuffle256Ld, _Dst.info512,
	_Src.info512, _Src.info128, 0, null_frag, null_frag>,
	EVEX_V512;
	let Predicates = [HasDQI, HasVLX] in
	defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
	WriteShuffle256Ld, _Dst.info256,
	_Src.info256, _Src.info128, 0, null_frag, null_frag>,
	EVEX_V256;
	}

	multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _Dst,
	AVX512VLVectorVTInfo _Src> :
	avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {

	let Predicates = [HasDQI, HasVLX] in
	defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
	WriteShuffleXLd, _Dst.info128,
	_Src.info128, _Src.info128, 0, null_frag, null_frag>,
	EVEX_V128;
	}

	defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
	avx512vl_i32_info, avx512vl_i64_info>;
	defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
	avx512vl_f32_info, avx512vl_f64_info>;

	//===----------------------------------------------------------------------===//
	// AVX-512 BROADCAST MASK TO VECTOR REGISTER
	//---
	multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, RegisterClass KRC> {
	def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
	EVEX, Sched<[WriteShuffle]>;
	}

	multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
	let Predicates = [HasCDI] in
	defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
	let Predicates = [HasCDI, HasVLX] in {
	defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
	defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
	}
	}

	defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
	avx512vl_i32_info, VK16>;
	defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
	avx512vl_i64_info, VK8>, VEX_W;

	//===----------------------------------------------------------------------===//
	// -- VPERMI2 - 3 source operands form --
	multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
	hasSideEffects = 0 in {
	defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
	EVEX_4V, AVX5128IBase, Sched<[sched]>;

	let mayLoad = 1 in
	defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
	(_.VT (_.LdFrag addr:$src3)))), 1>,
	EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
	hasSideEffects = 0, mayLoad = 1 in
	defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(_.VT (X86VPermt2 _.RC:$src2,
	IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
	AVX5128IBase, EVEX_4V, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo VTInfo,
	AVX512VLVectorVTInfo ShuffleMask> {
	defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
	ShuffleMask.info512>,
	avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
	ShuffleMask.info512>, EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
	ShuffleMask.info128>,
	avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
	ShuffleMask.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
	ShuffleMask.info256>,
	avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
	ShuffleMask.info256>, EVEX_V256;
	}
	}

	multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo VTInfo,
	AVX512VLVectorVTInfo Idx,
	Predicate Prd> {
	let Predicates = [Prd] in
	defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
	Idx.info512>, EVEX_V512;
	let Predicates = [Prd, HasVLX] in {
	defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
	Idx.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
	Idx.info256>, EVEX_V256;
	}
	}

	defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
	avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
	avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
	defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
	avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
	VEX_W, EVEX_CD8<16, CD8VF>;
	defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
	avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
	EVEX_CD8<8, CD8VF>;
	defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
	avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
	avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;

	// Extra patterns to deal with extra bitcasts due to passthru and index being
	// different types on the fp versions.
	multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
	X86VectorVTInfo IdxVT,
	X86VectorVTInfo CastVT> {
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(X86VPermt2 (_.VT _.RC:$src2),
	(IdxVT.VT (bitconvert
	(CastVT.VT _.RC:$src1))),
	_.RC:$src3),
	(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
	(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, _.RC:$src3)>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(X86VPermt2 _.RC:$src2,
	(IdxVT.VT (bitconvert
	(CastVT.VT _.RC:$src1))),
	(_.LdFrag addr:$src3)),
	(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
	(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3)>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(X86VPermt2 _.RC:$src2,
	(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
	(_.BroadcastLdFrag addr:$src3)),
	(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
	(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3)>;
	}

	// TODO: Should we add more casts? The vXi64 case is common due to ABI.
	defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
	defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
	defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;

	// VPERMT2
	multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins IdxVT.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
	EVEX_4V, AVX5128IBase, Sched<[sched]>;

	defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins IdxVT.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
	(_.LdFrag addr:$src3))), 1>,
	EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}
	multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
	defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(_.VT (X86VPermt2 _.RC:$src1,
	IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
	AVX5128IBase, EVEX_4V, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo VTInfo,
	AVX512VLVectorVTInfo ShuffleMask> {
	defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
	ShuffleMask.info512>,
	avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
	ShuffleMask.info512>, EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
	ShuffleMask.info128>,
	avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
	ShuffleMask.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
	ShuffleMask.info256>,
	avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
	ShuffleMask.info256>, EVEX_V256;
	}
	}

	multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo VTInfo,
	AVX512VLVectorVTInfo Idx, Predicate Prd> {
	let Predicates = [Prd] in
	defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
	Idx.info512>, EVEX_V512;
	let Predicates = [Prd, HasVLX] in {
	defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
	Idx.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
	Idx.info256>, EVEX_V256;
	}
	}

	defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
	avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
	avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
	defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
	avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
	VEX_W, EVEX_CD8<16, CD8VF>;
	defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
	avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
	EVEX_CD8<8, CD8VF>;
	defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
	avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
	avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - BLEND using mask
	//

	multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
	def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst}\|${dst}, $src1, $src2}"), []>,
	EVEX_4V, Sched<[sched]>;
	def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}}\|${dst} {${mask}}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_K, Sched<[sched]>;
	def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}} {z}\|${dst} {${mask}} {z}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
	let mayLoad = 1 in {
	def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst}\|${dst}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}}\|${dst} {${mask}}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}} {z}\|${dst} {${mask}} {z}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
	}
	}
	}
	multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
	def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
	EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}\|",
	"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
	EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;

	def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst\|",
	"$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
	EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo> {
	defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
	WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
	EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
	WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
	WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
	EVEX_V128;
	}
	}

	multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasBWI] in
	defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
	EVEX_V512;

	let Predicates = [HasBWI, HasVLX] in {
	defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
	EVEX_V128;
	}
	}

	defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
	avx512vl_f32_info>;
	defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
	avx512vl_f64_info>, VEX_W;
	defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
	avx512vl_i32_info>;
	defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
	avx512vl_i64_info>, VEX_W;
	defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
	avx512vl_i8_info>;
	defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
	avx512vl_i16_info>, VEX_W;

	//===----------------------------------------------------------------------===//
	// Compare Instructions
	//===----------------------------------------------------------------------===//

	// avx512_cmp_scalar - AVX512 CMPSS and CMPSD

	multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
	PatFrag OpNode_su, PatFrag OpNodeSAE_su,
	X86FoldableSchedWrite sched> {
	defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
	(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
	let mayLoad = 1 in
	defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc",
	(OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
	timm:$cc),
	(OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
	timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;

	let Uses = [MXCSR] in
	defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
	(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	timm:$cc),
	(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	timm:$cc)>,
	EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;

	let isCodeGenOnly = 1 in {
	let isCommutable = 1 in
	def rr : AVX512Ii8<0xC2, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
	!strconcat("vcmp", _.Suffix,
	"\t{$cc, $src2, $src1, $dst\|$dst, $src1, $src2, $cc}"),
	[(set _.KRC:$dst, (OpNode _.FRC:$src1,
	_.FRC:$src2,
	timm:$cc))]>,
	EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
	def rm : AVX512Ii8<0xC2, MRMSrcMem,
	(outs _.KRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
	!strconcat("vcmp", _.Suffix,
	"\t{$cc, $src2, $src1, $dst\|$dst, $src1, $src2, $cc}"),
	[(set _.KRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2),
	timm:$cc))]>,
	EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}
	}

	def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(X86cmpms node:$src1, node:$src2, node:$cc), [{
	return N->hasOneUse();
	}]>;
	def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
	return N->hasOneUse();
	}]>;

	let Predicates = [HasAVX512] in {
	let ExeDomain = SSEPackedSingle in
	defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
	X86cmpms_su, X86cmpmsSAE_su,
	SchedWriteFCmp.Scl>, AVX512XSIi8Base;
	let ExeDomain = SSEPackedDouble in
	defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
	X86cmpms_su, X86cmpmsSAE_su,
	SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
	}
	let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
	defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
	X86cmpms_su, X86cmpmsSAE_su,
	SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;

	multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, bit IsCommutable> {
	let isCommutable = IsCommutable, hasSideEffects = 0 in
	def rr : AVX512BI<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[]>, EVEX_4V, Sched<[sched]>;
	let mayLoad = 1, hasSideEffects = 0 in
	def rm : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[]>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	let isCommutable = IsCommutable, hasSideEffects = 0 in
	def rrk : AVX512BI<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_K, Sched<[sched]>;
	let mayLoad = 1, hasSideEffects = 0 in
	def rmk : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	bit IsCommutable> :
	avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
	let mayLoad = 1, hasSideEffects = 0 in {
	def rmb : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
	"\|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
	[]>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmbk : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
	_.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
	[]>, EVEX_4V, EVEX_K, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo, Predicate prd,
	bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
	VTInfo.info512, IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
	VTInfo.info256, IsCommutable>, EVEX_V256;
	defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
	VTInfo.info128, IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo,
	Predicate prd, bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
	VTInfo.info512, IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
	VTInfo.info256, IsCommutable>, EVEX_V256;
	defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
	VTInfo.info128, IsCommutable>, EVEX_V128;
	}
	}

	// This fragment treats X86cmpm as commutable to help match loads in both
	// operands for PCMPEQ.
	def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
	def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
	(setcc node:$src1, node:$src2, SETGT)>;

	// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
	// increase the pattern complexity the way an immediate would.
	let AddedComplexity = 2 in {
	// FIXME: Is there a better scheduler class for VPCMP?
	defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
	SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
	EVEX_CD8<8, CD8VF>, VEX_WIG;

	defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
	SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
	EVEX_CD8<16, CD8VF>, VEX_WIG;

	defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
	SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
	EVEX_CD8<32, CD8VF>;

	defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
	SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
	T8PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
	SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
	EVEX_CD8<8, CD8VF>, VEX_WIG;

	defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
	SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
	EVEX_CD8<16, CD8VF>, VEX_WIG;

	defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
	SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
	EVEX_CD8<32, CD8VF>;

	defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
	SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
	T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
	}

	def X86pcmpm_imm : SDNodeXForm<setcc, [{
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	uint8_t SSECC = X86::getVPCMPImmForCond(CC);
	return getI8Imm(SSECC, SDLoc(N));
	}]>;

	// Swapped operand version of the above.
	def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	uint8_t SSECC = X86::getVPCMPImmForCond(CC);
	SSECC = X86::getSwappedVPCMPImm(SSECC);
	return getI8Imm(SSECC, SDLoc(N));
	}]>;

	multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
	PatFrag Frag_su,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, string Name> {
	let isCommutable = 1 in
	def rri : AVX512AIi8<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, $src2, $src1, $dst\|$dst, $src1, $src2, $cc}"),
	[(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	cond)))]>,
	EVEX_4V, Sched<[sched]>;
	def rmi : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, $src2, $src1, $dst\|$dst, $src1, $src2, $cc}"),
	[(set _.KRC:$dst, (_.KVT
	(Frag:$cc
	(_.VT _.RC:$src1),
	(_.VT (_.LdFrag addr:$src2)),
	cond)))]>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	let isCommutable = 1 in
	def rrik : AVX512AIi8<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
	u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, $src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2, $cc}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	cond))))]>,
	EVEX_4V, EVEX_K, Sched<[sched]>;
	def rmik : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
	u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, $src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2, $cc}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(_.KVT
	(Frag_su:$cc
	(_.VT _.RC:$src1),
	(_.VT (_.LdFrag addr:$src2)),
	cond))))]>,
	EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;

	def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
	(_.VT _.RC:$src1), cond)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmi")
	_.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;

	def : Pat<(and _.KRCWM:$mask,
	(_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
	(_.VT _.RC:$src1), cond))),
	(!cast<Instruction>(Name#_.ZSuffix#"rmik")
	_.KRCWM:$mask, _.RC:$src1, addr:$src2,
	(X86pcmpm_imm_commute $cc))>;
	}

	multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
	PatFrag Frag_su, X86FoldableSchedWrite sched,
	X86VectorVTInfo _, string Name> :
	avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
	def rmib : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
	u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst\|",
	"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
	[(set _.KRC:$dst, (_.KVT (Frag:$cc
	(_.VT _.RC:$src1),
	(_.BroadcastLdFrag addr:$src2),
	cond)))]>,
	EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmibk : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
	_.ScalarMemOp:$src2, u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(_.KVT (Frag_su:$cc
	(_.VT _.RC:$src1),
	(_.BroadcastLdFrag addr:$src2),
	cond))))]>,
	EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;

	def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
	(_.VT _.RC:$src1), cond)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmib")
	_.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;

	def : Pat<(and _.KRCWM:$mask,
	(_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
	(_.VT _.RC:$src1), cond))),
	(!cast<Instruction>(Name#_.ZSuffix#"rmibk")
	_.KRCWM:$mask, _.RC:$src1, addr:$src2,
	(X86pcmpm_imm_commute $cc))>;
	}

	multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
	PatFrag Frag_su, X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
	sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
	sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
	defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
	sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
	}
	}

	multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
	PatFrag Frag_su, X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
	sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
	sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
	defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
	sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
	}
	}

	def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(setcc node:$src1, node:$src2, node:$cc), [{
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	return !ISD::isUnsignedIntSetCC(CC);
	}], X86pcmpm_imm>;

	def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(setcc node:$src1, node:$src2, node:$cc), [{
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
	}], X86pcmpm_imm>;

	def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(setcc node:$src1, node:$src2, node:$cc), [{
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	return ISD::isUnsignedIntSetCC(CC);
	}], X86pcmpm_imm>;

	def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(setcc node:$src1, node:$src2, node:$cc), [{
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
	}], X86pcmpm_imm>;

	// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
	defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
	SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
	EVEX_CD8<8, CD8VF>;
	defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
	SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
	EVEX_CD8<8, CD8VF>;

	defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
	SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
	VEX_W, EVEX_CD8<16, CD8VF>;
	defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
	SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
	VEX_W, EVEX_CD8<16, CD8VF>;

	defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
	SchedWriteVecALU, avx512vl_i32_info,
	HasAVX512>, EVEX_CD8<32, CD8VF>;
	defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
	SchedWriteVecALU, avx512vl_i32_info,
	HasAVX512>, EVEX_CD8<32, CD8VF>;

	defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
	SchedWriteVecALU, avx512vl_i64_info,
	HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
	defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
	SchedWriteVecALU, avx512vl_i64_info,
	HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;

	def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
	(X86cmpm node:$src1, node:$src2, node:$cc), [{
	return N->hasOneUse();
	}]>;

	def X86cmpm_imm_commute : SDNodeXForm<timm, [{
	uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
	return getI8Imm(Imm, SDLoc(N));
	}]>;

	multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
	string Name> {
	let Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc",
	(X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
	(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
	1>, Sched<[sched]>;

	defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc",
	(X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
	timm:$cc),
	(X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
	timm:$cc)>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, ${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr#", $cc",
	(X86any_cmpm (_.VT _.RC:$src1),
	(_.VT (_.BroadcastLdFrag addr:$src2)),
	timm:$cc),
	(X86cmpm_su (_.VT _.RC:$src1),
	(_.VT (_.BroadcastLdFrag addr:$src2)),
	timm:$cc)>,
	EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	// Patterns for selecting with loads in other operand.
	def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
	timm:$cc),
	(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
	(_.VT _.RC:$src1),
	timm:$cc)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
	_.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
	(_.VT _.RC:$src1), timm:$cc),
	(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
	(_.VT _.RC:$src1),
	timm:$cc)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
	_.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	// Patterns for mask intrinsics.
	def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
	(_.KVT immAllOnesV)),
	(!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;

	def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
	_.RC:$src2, timm:$cc)>;

	def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
	(_.KVT immAllOnesV)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;

	def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
	_.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
	addr:$src2, timm:$cc)>;

	def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
	(_.KVT immAllOnesV)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;

	def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
	_.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
	addr:$src2, timm:$cc)>;

	// Patterns for mask intrinsics with loads in other operand.
	def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
	(_.KVT immAllOnesV)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
	_.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
	_.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
	(_.KVT immAllOnesV)),
	(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
	_.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
	_.RC:$src1, addr:$src2,
	(X86cmpm_imm_commute timm:$cc))>;
	}

	multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	// comparison code form (VCMP[EQ/LT/LE/...]
	let Uses = [MXCSR] in
	defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, {sae}, $src2, $src1",
	"$src1, $src2, {sae}, $cc",
	[(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
	(_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
	[(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
	(_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
	EVEX_B, Sched<[sched]>;
	}

	multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
	Predicate Pred = HasAVX512> {
	let Predicates = [Pred] in {
	defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
	avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;

	}
	let Predicates = [Pred,HasVLX] in {
	defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
	defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
	}
	}

	defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
	AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
	AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
	defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
	AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;

	// Patterns to select fp compares with load as first operand.
	let Predicates = [HasAVX512] in {
	def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
	(VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;

	def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
	(VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
	}

	let Predicates = [HasFP16] in {
	def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
	(VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
	}

	// ----------------------------------------------------------------
	// FPClass

	def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
	(X86Vfpclasss node:$src1, node:$src2), [{
	return N->hasOneUse();
	}]>;

	def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
	(X86Vfpclass node:$src1, node:$src2), [{
	return N->hasOneUse();
	}]>;

	//handle fpclass instruction mask = op(reg_scalar,imm)
	// op(mem_scalar,imm)
	multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	Predicate prd> {
	let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
	def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
	(i32 timm:$src2)))]>,
	Sched<[sched]>;
	def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst,(and _.KRCWM:$mask,
	(X86Vfpclasss_su (_.VT _.RC:$src1),
	(i32 timm:$src2))))]>,
	EVEX_K, Sched<[sched]>;
	def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,
	(X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
	(i32 timm:$src2)))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst,(and _.KRCWM:$mask,
	(X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
	(i32 timm:$src2))))]>,
	EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
	// fpclass(reg_vec, mem_vec, imm)
	// fpclass(reg_vec, broadcast(eltVt), imm)
	multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	string mem>{
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
	def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
	(i32 timm:$src2)))]>,
	Sched<[sched]>;
	def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst,(and _.KRCWM:$mask,
	(X86Vfpclass_su (_.VT _.RC:$src1),
	(i32 timm:$src2))))]>,
	EVEX_K, Sched<[sched]>;
	def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#"{"#mem#"}"#
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,(X86Vfpclass
	(_.VT (_.LdFrag addr:$src1)),
	(i32 timm:$src2)))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#"{"#mem#"}"#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
	(_.VT (_.LdFrag addr:$src1)),
	(i32 timm:$src2))))]>,
	EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
	_.BroadcastStr#", $dst\|$dst, ${src1}"
	#_.BroadcastStr#", $src2}",
	[(set _.KRC:$dst,(X86Vfpclass
	(_.VT (_.BroadcastLdFrag addr:$src1)),
	(i32 timm:$src2)))]>,
	EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
	_.BroadcastStr#", $dst {${mask}}\|$dst {${mask}}, ${src1}"#
	_.BroadcastStr#", $src2}",
	[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
	(_.VT (_.BroadcastLdFrag addr:$src1)),
	(i32 timm:$src2))))]>,
	EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	// Allow registers or broadcast with the x, y, z suffix we use to disambiguate
	// the memory form.
	def : InstAlias<OpcodeStr#_.Suffix#mem#
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	(!cast<Instruction>(NAME#"rr")
	_.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
	def : InstAlias<OpcodeStr#_.Suffix#mem#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	(!cast<Instruction>(NAME#"rrk")
	_.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
	def : InstAlias<OpcodeStr#_.Suffix#mem#
	"\t{$src2, ${src1}"#_.BroadcastStr#", $dst\|$dst, ${src1}"#
	_.BroadcastStr#", $src2}",
	(!cast<Instruction>(NAME#"rmb")
	_.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
	def : InstAlias<OpcodeStr#_.Suffix#mem#
	"\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}\|"
	"$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
	(!cast<Instruction>(NAME#"rmbk")
	_.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
	}

	multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
	bits<8> opc, X86SchedWriteWidths sched,
	Predicate prd>{
	let Predicates = [prd] in {
	defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
	_.info512, "z">, EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
	_.info128, "x">, EVEX_V128;
	defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
	_.info256, "y">, EVEX_V256;
	}
	}

	multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
	bits<8> opcScalar, X86SchedWriteWidths sched> {
	defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
	sched, HasFP16>,
	EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
	defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
	sched.Scl, f16x_info, HasFP16>,
	EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
	defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
	sched, HasDQI>,
	EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
	defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
	sched, HasDQI>,
	EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
	defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
	sched.Scl, f32x_info, HasDQI>, VEX_LIG,
	EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
	defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
	sched.Scl, f64x_info, HasDQI>, VEX_LIG,
	EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
	}

	defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;

	//-----------------------------------------------------------------
	// Mask register copy, including
	// - copy between mask registers
	// - load/store mask registers
	// - copy from GPR to mask register and vice versa
	//
	multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
	string OpcodeStr, RegisterClass KRC,
	ValueType vvt, X86MemOperand x86memop> {
	let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
	def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), []>,
	Sched<[WriteMove]>;
	def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set KRC:$dst, (vvt (load addr:$src)))]>,
	Sched<[WriteLoad]>;
	def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(store KRC:$src, addr:$dst)]>,
	Sched<[WriteStore]>;
	}

	multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
	string OpcodeStr,
	RegisterClass KRC, RegisterClass GRC> {
	let hasSideEffects = 0 in {
	def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), []>,
	Sched<[WriteMove]>;
	def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), []>,
	Sched<[WriteMove]>;
	}
	}

	let Predicates = [HasDQI] in
	defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
	avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
	VEX, PD;

	let Predicates = [HasAVX512] in
	defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
	avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
	VEX, PS;

	let Predicates = [HasBWI] in {
	defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
	VEX, PD, VEX_W;
	defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
	VEX, XD;
	defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
	VEX, PS, VEX_W;
	defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
	VEX, XD, VEX_W;
	}

	// GR from/to mask register
	def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
	def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
	def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;

	def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
	def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;

	def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
	(KMOVWrk VK16:$src)>;
	def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
	(SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
	def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
	(COPY_TO_REGCLASS VK16:$src, GR32)>;
	def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
	(INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;

	def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
	(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
	def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
	(SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
	def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
	(COPY_TO_REGCLASS VK8:$src, GR32)>;
	def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
	(INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;

	def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
	(COPY_TO_REGCLASS GR32:$src, VK32)>;
	def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
	(COPY_TO_REGCLASS VK32:$src, GR32)>;
	def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
	(COPY_TO_REGCLASS GR64:$src, VK64)>;
	def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
	(COPY_TO_REGCLASS VK64:$src, GR64)>;

	// Load/store kreg
	let Predicates = [HasDQI] in {
	def : Pat<(v1i1 (load addr:$src)),
	(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
	def : Pat<(v2i1 (load addr:$src)),
	(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
	def : Pat<(v4i1 (load addr:$src)),
	(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
	(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
	def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
	(KMOVWkm addr:$src)>;
	}

	def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
	SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
	SDTCVecEltisVT<1, i1>,
	SDTCisPtrTy<2>]>>;

	let Predicates = [HasAVX512] in {
	multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
	def : Pat<(maskVT (scalar_to_vector GR32:$src)),
	(COPY_TO_REGCLASS GR32:$src, maskRC)>;

	def : Pat<(maskVT (scalar_to_vector GR8:$src)),
	(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;

	def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;

	def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
	(i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
	}

	defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
	defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
	defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
	defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
	defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
	defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
	defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;

	def : Pat<(insert_subvector (v16i1 immAllZerosV),
	(v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
	(KMOVWkr (AND32ri8
	(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
	(i32 1)))>;
	}

	// Mask unary operation
	// - KNOT
	multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
	RegisterClass KRC, SDPatternOperator OpNode,
	X86FoldableSchedWrite sched, Predicate prd> {
	let Predicates = [prd] in
	def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set KRC:$dst, (OpNode KRC:$src))]>,
	Sched<[sched]>;
	}

	multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode,
	X86FoldableSchedWrite sched> {
	defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
	sched, HasDQI>, VEX, PD;
	defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
	sched, HasAVX512>, VEX, PS;
	defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
	sched, HasBWI>, VEX, PD, VEX_W;
	defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
	sched, HasBWI>, VEX, PS, VEX_W;
	}

	// TODO - do we need a X86SchedWriteWidths::KMASK type?
	defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;

	// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
	let Predicates = [HasAVX512, NoDQI] in
	def : Pat<(vnot VK8:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;

	def : Pat<(vnot VK4:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
	def : Pat<(vnot VK2:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
	def : Pat<(vnot VK1:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;

	// Mask binary operation
	// - KAND, KANDN, KOR, KXNOR, KXOR
	multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
	RegisterClass KRC, SDPatternOperator OpNode,
	X86FoldableSchedWrite sched, Predicate prd,
	bit IsCommutable> {
	let Predicates = [prd], isCommutable = IsCommutable in
	def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
	Sched<[sched]>;
	}

	multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode,
	X86FoldableSchedWrite sched, bit IsCommutable,
	Predicate prdW = HasAVX512> {
	defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
	sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
	defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
	sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
	defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
	sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
	defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
	sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
	}

	// These nodes use 'vnot' instead of 'not' to support vectors.
	def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
	def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;

	// TODO - do we need a X86SchedWriteWidths::KMASK type?
	defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
	defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
	defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
	defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
	defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
	defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;

	multiclass avx512_binop_pat<SDPatternOperator VOpNode,
	Instruction Inst> {
	// With AVX512F, 8-bit mask is promoted to 16-bit mask,
	// for the DQI set, this type is legal and KxxxB instruction is used
	let Predicates = [NoDQI] in
	def : Pat<(VOpNode VK8:$src1, VK8:$src2),
	(COPY_TO_REGCLASS
	(Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
	(COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;

	// All types smaller than 8 bits require conversion anyway
	def : Pat<(VOpNode VK1:$src1, VK1:$src2),
	(COPY_TO_REGCLASS (Inst
	(COPY_TO_REGCLASS VK1:$src1, VK16),
	(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
	def : Pat<(VOpNode VK2:$src1, VK2:$src2),
	(COPY_TO_REGCLASS (Inst
	(COPY_TO_REGCLASS VK2:$src1, VK16),
	(COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
	def : Pat<(VOpNode VK4:$src1, VK4:$src2),
	(COPY_TO_REGCLASS (Inst
	(COPY_TO_REGCLASS VK4:$src1, VK16),
	(COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
	}

	defm : avx512_binop_pat<and, KANDWrr>;
	defm : avx512_binop_pat<vandn, KANDNWrr>;
	defm : avx512_binop_pat<or, KORWrr>;
	defm : avx512_binop_pat<vxnor, KXNORWrr>;
	defm : avx512_binop_pat<xor, KXORWrr>;

	// Mask unpacking
	multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
	X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
	Predicate prd> {
	let Predicates = [prd] in {
	let hasSideEffects = 0 in
	def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
	(ins Src.KRC:$src1, Src.KRC:$src2),
	"kunpck"#Suffix#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	VEX_4V, VEX_L, Sched<[sched]>;

	def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
	(!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
	}
	}

	defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
	defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
	defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;

	// Mask bit testing
	multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
	SDNode OpNode, X86FoldableSchedWrite sched,
	Predicate prd> {
	let Predicates = [prd], Defs = [EFLAGS] in
	def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1\|$src1, $src2}"),
	[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
	Sched<[sched]>;
	}

	multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched,
	Predicate prdW = HasAVX512> {
	defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
	VEX, PD;
	defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
	VEX, PS;
	defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
	VEX, PS, VEX_W;
	defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
	VEX, PD, VEX_W;
	}

	// TODO - do we need a X86SchedWriteWidths::KMASK type?
	defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
	defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;

	// Mask shift
	multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
	SDNode OpNode, X86FoldableSchedWrite sched> {
	let Predicates = [HasAVX512] in
	def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
	!strconcat(OpcodeStr,
	"\t{$imm, $src, $dst\|$dst, $src, $imm}"),
	[(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
	Sched<[sched]>;
	}

	multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
	SDNode OpNode, X86FoldableSchedWrite sched> {
	defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
	sched>, VEX, TAPD, VEX_W;
	let Predicates = [HasDQI] in
	defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
	sched>, VEX, TAPD;
	let Predicates = [HasBWI] in {
	defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
	sched>, VEX, TAPD, VEX_W;
	defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
	sched>, VEX, TAPD;
	}
	}

	defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
	defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;

	// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
	multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
	string InstStr,
	X86VectorVTInfo Narrow,
	X86VectorVTInfo Wide> {
	def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
	(Narrow.VT Narrow.RC:$src2), cond)),
	(COPY_TO_REGCLASS
	(!cast<Instruction>(InstStr#"Zrri")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
	(X86pcmpm_imm $cc)), Narrow.KRC)>;

	def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
	(Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
	(Narrow.VT Narrow.RC:$src2),
	cond)))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
	(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
	(X86pcmpm_imm $cc)), Narrow.KRC)>;
	}

	multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
	string InstStr,
	X86VectorVTInfo Narrow,
	X86VectorVTInfo Wide> {
	// Broadcast load.
	def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
	(Narrow.BroadcastLdFrag addr:$src2), cond)),
	(COPY_TO_REGCLASS
	(!cast<Instruction>(InstStr#"Zrmib")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;

	def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
	(Narrow.KVT
	(Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
	(Narrow.BroadcastLdFrag addr:$src2),
	cond)))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
	(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;

	// Commuted with broadcast load.
	def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
	(Narrow.VT Narrow.RC:$src1),
	cond)),
	(COPY_TO_REGCLASS
	(!cast<Instruction>(InstStr#"Zrmib")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;

	def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
	(Narrow.KVT
	(Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
	(Narrow.VT Narrow.RC:$src1),
	cond)))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
	(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
	}

	// Same as above, but for fp types which don't use PatFrags.
	multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
	X86VectorVTInfo Narrow,
	X86VectorVTInfo Wide> {
	def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
	(Narrow.VT Narrow.RC:$src2), timm:$cc)),
	(COPY_TO_REGCLASS
	(!cast<Instruction>(InstStr#"Zrri")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
	timm:$cc), Narrow.KRC)>;

	def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
	(X86cmpm_su (Narrow.VT Narrow.RC:$src1),
	(Narrow.VT Narrow.RC:$src2), timm:$cc))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
	(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
	timm:$cc), Narrow.KRC)>;

	// Broadcast load.
	def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
	(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
	(COPY_TO_REGCLASS
	(!cast<Instruction>(InstStr#"Zrmbi")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, timm:$cc), Narrow.KRC)>;

	def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
	(X86cmpm_su (Narrow.VT Narrow.RC:$src1),
	(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
	(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, timm:$cc), Narrow.KRC)>;

	// Commuted with broadcast load.
	def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
	(Narrow.VT Narrow.RC:$src1), timm:$cc)),
	(COPY_TO_REGCLASS
	(!cast<Instruction>(InstStr#"Zrmbi")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;

	def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
	(X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
	(Narrow.VT Narrow.RC:$src1), timm:$cc))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
	(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
	addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
	}

	let Predicates = [HasAVX512, NoVLX] in {
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;

	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;

	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;

	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;

	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
	defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;

	defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
	defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
	defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
	defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
	}

	let Predicates = [HasBWI, NoVLX] in {
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
	}

	// Mask setting all 0s or 1s
	multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
	let Predicates = [HasAVX512] in
	let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
	SchedRW = [WriteZero] in
	def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
	[(set KRC:$dst, (VT Val))]>;
	}

	multiclass avx512_mask_setop_w<SDPatternOperator Val> {
	defm W : avx512_mask_setop<VK16, v16i1, Val>;
	defm D : avx512_mask_setop<VK32, v32i1, Val>;
	defm Q : avx512_mask_setop<VK64, v64i1, Val>;
	}

	defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
	defm KSET1 : avx512_mask_setop_w<immAllOnesV>;

	// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
	let Predicates = [HasAVX512] in {
	def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
	def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
	def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
	def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
	def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
	def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
	def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
	def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
	}

	// Patterns for kmask insert_subvector/extract_subvector to/from index=0
	multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
	RegisterClass RC, ValueType VT> {
	def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
	(subVT (COPY_TO_REGCLASS RC:$src, subRC))>;

	def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
	(VT (COPY_TO_REGCLASS subRC:$src, RC))>;
	}
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
	defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - Aligned and unaligned load and store
	//

	multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
	X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
	X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
	bit NoRMPattern = 0,
	SDPatternOperator SelectOprr = vselect> {
	let hasSideEffects = 0 in {
	let isMoveReg = 1 in
	def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), [],
	_.ExeDomain>, EVEX, Sched<[Sched.RR]>,
	EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
	def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}\|",
	"${dst} {${mask}} {z}, $src}"),
	[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
	(_.VT _.RC:$src),
	_.ImmAllZerosV)))], _.ExeDomain>,
	EVEX, EVEX_KZ, Sched<[Sched.RR]>;

	let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
	def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	!if(NoRMPattern, [],
	[(set _.RC:$dst,
	(_.VT (ld_frag addr:$src)))]),
	_.ExeDomain>, EVEX, Sched<[Sched.RM]>,
	EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;

	let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
	def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
	!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}\|",
	"${dst} {${mask}}, $src1}"),
	[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
	(_.VT _.RC:$src1),
	(_.VT _.RC:$src0))))], _.ExeDomain>,
	EVEX, EVEX_K, Sched<[Sched.RR]>;
	def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
	!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}\|",
	"${dst} {${mask}}, $src1}"),
	[(set _.RC:$dst, (_.VT
	(vselect_mask _.KRCWM:$mask,
	(_.VT (ld_frag addr:$src1)),
	(_.VT _.RC:$src0))))], _.ExeDomain>,
	EVEX, EVEX_K, Sched<[Sched.RM]>;
	}
	def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.MemOp:$src),
	OpcodeStr #"\t{$src, ${dst} {${mask}} {z}\|"#
	"${dst} {${mask}} {z}, $src}",
	[(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
	(_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
	_.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
	}
	def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
	(!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;

	def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
	(!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;

	def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
	(!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
	_.KRCWM:$mask, addr:$ptr)>;
	}

	multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	X86SchedWriteMoveLSWidths Sched,
	string EVEX2VEXOvrd, bit NoRMPattern = 0> {
	let Predicates = [prd] in
	defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
	_.info512.AlignedLdFrag, masked_load_aligned,
	Sched.ZMM, "", NoRMPattern>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
	_.info256.AlignedLdFrag, masked_load_aligned,
	Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
	defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
	_.info128.AlignedLdFrag, masked_load_aligned,
	Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
	}
	}

	multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	X86SchedWriteMoveLSWidths Sched,
	string EVEX2VEXOvrd, bit NoRMPattern = 0,
	SDPatternOperator SelectOprr = vselect> {
	let Predicates = [prd] in
	defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
	masked_load, Sched.ZMM, "",
	NoRMPattern, SelectOprr>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
	masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
	NoRMPattern, SelectOprr>, EVEX_V256;
	defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
	masked_load, Sched.XMM, EVEX2VEXOvrd,
	NoRMPattern, SelectOprr>, EVEX_V128;
	}
	}

	multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
	X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
	X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
	bit NoMRPattern = 0> {
	let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
	let isMoveReg = 1 in
	def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
	OpcodeStr # "\t{$src, $dst\|$dst, $src}",
	[], _.ExeDomain>, EVEX,
	FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
	EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
	def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # "\t{$src, ${dst} {${mask}}\|"#
	"${dst} {${mask}}, $src}",
	[], _.ExeDomain>, EVEX, EVEX_K,
	FoldGenData<BaseName#_.ZSuffix#rrk>,
	Sched<[Sched.RR]>;
	def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # "\t{$src, ${dst} {${mask}} {z}\|" #
	"${dst} {${mask}} {z}, $src}",
	[], _.ExeDomain>, EVEX, EVEX_KZ,
	FoldGenData<BaseName#_.ZSuffix#rrkz>,
	Sched<[Sched.RR]>;
	}

	let hasSideEffects = 0, mayStore = 1 in
	def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	!if(NoMRPattern, [],
	[(st_frag (_.VT _.RC:$src), addr:$dst)]),
	_.ExeDomain>, EVEX, Sched<[Sched.MR]>,
	EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
	def mrk : AVX512PI<opc, MRMDestMem, (outs),
	(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # "\t{$src, ${dst} {${mask}}\|${dst} {${mask}}, $src}",
	[], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
	NotMemoryFoldable;

	def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
	(!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
	_.KRCWM:$mask, _.RC:$src)>;

	def : InstAlias<OpcodeStr#".s\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
	_.RC:$dst, _.RC:$src), 0>;
	def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}\|${dst} {${mask}}, $src}",
	(!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
	_.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
	def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}\|${dst} {${mask}} {z}, $src}",
	(!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
	_.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
	}

	multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	X86SchedWriteMoveLSWidths Sched,
	string EVEX2VEXOvrd, bit NoMRPattern = 0> {
	let Predicates = [prd] in
	defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
	masked_store, Sched.ZMM, "",
	NoMRPattern>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
	masked_store, Sched.YMM,
	EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
	defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
	masked_store, Sched.XMM, EVEX2VEXOvrd,
	NoMRPattern>, EVEX_V128;
	}
	}

	multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	X86SchedWriteMoveLSWidths Sched,
	string EVEX2VEXOvrd, bit NoMRPattern = 0> {
	let Predicates = [prd] in
	defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
	masked_store_aligned, Sched.ZMM, "",
	NoMRPattern>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
	masked_store_aligned, Sched.YMM,
	EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
	defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
	masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
	NoMRPattern>, EVEX_V128;
	}
	}

	defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
	HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
	avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
	HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
	PS, EVEX_CD8<32, CD8VF>;

	defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
	HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
	avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
	HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
	SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
	avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
	SchedWriteFMoveLS, "VMOVUPS">,
	PS, EVEX_CD8<32, CD8VF>;

	defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
	SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
	avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
	SchedWriteFMoveLS, "VMOVUPD">,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
	HasAVX512, SchedWriteVecMoveLS,
	"VMOVDQA", 1>,
	avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
	HasAVX512, SchedWriteVecMoveLS,
	"VMOVDQA", 1>,
	PD, EVEX_CD8<32, CD8VF>;

	defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
	HasAVX512, SchedWriteVecMoveLS,
	"VMOVDQA">,
	avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
	HasAVX512, SchedWriteVecMoveLS,
	"VMOVDQA">,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
	SchedWriteVecMoveLS, "VMOVDQU", 1>,
	avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
	SchedWriteVecMoveLS, "VMOVDQU", 1>,
	XD, EVEX_CD8<8, CD8VF>;

	defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
	SchedWriteVecMoveLS, "VMOVDQU", 1>,
	avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
	SchedWriteVecMoveLS, "VMOVDQU", 1>,
	XD, VEX_W, EVEX_CD8<16, CD8VF>;

	defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
	SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
	avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
	SchedWriteVecMoveLS, "VMOVDQU", 1>,
	XS, EVEX_CD8<32, CD8VF>;

	defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
	SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
	avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
	SchedWriteVecMoveLS, "VMOVDQU">,
	XS, VEX_W, EVEX_CD8<64, CD8VF>;

	// Special instructions to help with spilling when we don't have VLX. We need
	// to load or store from a ZMM register instead. These are converted in
	// expandPostRAPseudos.
	let isReMaterializable = 1, canFoldAsLoad = 1,
	isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
	def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
	"", []>, Sched<[WriteFLoadX]>;
	def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
	"", []>, Sched<[WriteFLoadY]>;
	def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
	"", []>, Sched<[WriteFLoadX]>;
	def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
	"", []>, Sched<[WriteFLoadY]>;
	}

	let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
	def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
	"", []>, Sched<[WriteFStoreX]>;
	def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
	"", []>, Sched<[WriteFStoreY]>;
	def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
	"", []>, Sched<[WriteFStoreX]>;
	def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
	"", []>, Sched<[WriteFStoreY]>;
	}

	def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
	(v8i64 VR512:$src))),
	(VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
	VK8), VR512:$src)>;

	def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
	(v16i32 VR512:$src))),
	(VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;

	// These patterns exist to prevent the above patterns from introducing a second
	// mask inversion when one already exists.
	def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
	(v8i64 immAllZerosV),
	(v8i64 VR512:$src))),
	(VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
	def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
	(v16i32 immAllZerosV),
	(v16i32 VR512:$src))),
	(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;

	multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
	X86VectorVTInfo Wide> {
	def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
	Narrow.RC:$src1, Narrow.RC:$src0)),
	(EXTRACT_SUBREG
	(Wide.VT
	(!cast<Instruction>(InstrStr#"rrk")
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
	(COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
	Narrow.SubRegIdx)>;

	def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
	Narrow.RC:$src1, Narrow.ImmAllZerosV)),
	(EXTRACT_SUBREG
	(Wide.VT
	(!cast<Instruction>(InstrStr#"rrkz")
	(COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
	(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
	Narrow.SubRegIdx)>;
	}

	// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
	// available. Use a 512-bit operation and extract.
	let Predicates = [HasAVX512, NoVLX] in {
	defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
	defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
	defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
	defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;

	defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
	defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
	defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
	defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
	}

	let Predicates = [HasBWI, NoVLX] in {
	defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
	defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;

	defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
	defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;

	defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
	defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;

	defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
	defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
	}

	let Predicates = [HasAVX512] in {
	// 512-bit load.
	def : Pat<(alignedloadv16i32 addr:$src),
	(VMOVDQA64Zrm addr:$src)>;
	def : Pat<(alignedloadv32i16 addr:$src),
	(VMOVDQA64Zrm addr:$src)>;
	def : Pat<(alignedloadv32f16 addr:$src),
	(VMOVAPSZrm addr:$src)>;
	def : Pat<(alignedloadv32bf16 addr:$src),
	(VMOVAPSZrm addr:$src)>;
	def : Pat<(alignedloadv64i8 addr:$src),
	(VMOVDQA64Zrm addr:$src)>;
	def : Pat<(loadv16i32 addr:$src),
	(VMOVDQU64Zrm addr:$src)>;
	def : Pat<(loadv32i16 addr:$src),
	(VMOVDQU64Zrm addr:$src)>;
	def : Pat<(loadv32f16 addr:$src),
	(VMOVUPSZrm addr:$src)>;
	def : Pat<(loadv32bf16 addr:$src),
	(VMOVUPSZrm addr:$src)>;
	def : Pat<(loadv64i8 addr:$src),
	(VMOVDQU64Zrm addr:$src)>;

	// 512-bit store.
	def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
	(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
	def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
	(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
	def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
	(VMOVAPSZmr addr:$dst, VR512:$src)>;
	def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
	(VMOVAPSZmr addr:$dst, VR512:$src)>;
	def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
	(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
	def : Pat<(store (v16i32 VR512:$src), addr:$dst),
	(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
	def : Pat<(store (v32i16 VR512:$src), addr:$dst),
	(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
	def : Pat<(store (v32f16 VR512:$src), addr:$dst),
	(VMOVUPSZmr addr:$dst, VR512:$src)>;
	def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
	(VMOVUPSZmr addr:$dst, VR512:$src)>;
	def : Pat<(store (v64i8 VR512:$src), addr:$dst),
	(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
	}

	let Predicates = [HasVLX] in {
	// 128-bit load.
	def : Pat<(alignedloadv4i32 addr:$src),
	(VMOVDQA64Z128rm addr:$src)>;
	def : Pat<(alignedloadv8i16 addr:$src),
	(VMOVDQA64Z128rm addr:$src)>;
	def : Pat<(alignedloadv8f16 addr:$src),
	(VMOVAPSZ128rm addr:$src)>;
	def : Pat<(alignedloadv8bf16 addr:$src),
	(VMOVAPSZ128rm addr:$src)>;
	def : Pat<(alignedloadv16i8 addr:$src),
	(VMOVDQA64Z128rm addr:$src)>;
	def : Pat<(loadv4i32 addr:$src),
	(VMOVDQU64Z128rm addr:$src)>;
	def : Pat<(loadv8i16 addr:$src),
	(VMOVDQU64Z128rm addr:$src)>;
	def : Pat<(loadv8f16 addr:$src),
	(VMOVUPSZ128rm addr:$src)>;
	def : Pat<(loadv8bf16 addr:$src),
	(VMOVUPSZ128rm addr:$src)>;
	def : Pat<(loadv16i8 addr:$src),
	(VMOVDQU64Z128rm addr:$src)>;

	// 128-bit store.
	def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
	(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
	(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
	(VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
	(VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
	(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
	(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
	(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
	(VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
	(VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
	(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;

	// 256-bit load.
	def : Pat<(alignedloadv8i32 addr:$src),
	(VMOVDQA64Z256rm addr:$src)>;
	def : Pat<(alignedloadv16i16 addr:$src),
	(VMOVDQA64Z256rm addr:$src)>;
	def : Pat<(alignedloadv16f16 addr:$src),
	(VMOVAPSZ256rm addr:$src)>;
	def : Pat<(alignedloadv16bf16 addr:$src),
	(VMOVAPSZ256rm addr:$src)>;
	def : Pat<(alignedloadv32i8 addr:$src),
	(VMOVDQA64Z256rm addr:$src)>;
	def : Pat<(loadv8i32 addr:$src),
	(VMOVDQU64Z256rm addr:$src)>;
	def : Pat<(loadv16i16 addr:$src),
	(VMOVDQU64Z256rm addr:$src)>;
	def : Pat<(loadv16f16 addr:$src),
	(VMOVUPSZ256rm addr:$src)>;
	def : Pat<(loadv16bf16 addr:$src),
	(VMOVUPSZ256rm addr:$src)>;
	def : Pat<(loadv32i8 addr:$src),
	(VMOVDQU64Z256rm addr:$src)>;

	// 256-bit store.
	def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
	(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
	(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
	(VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
	(VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
	(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
	(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
	(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
	(VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
	(VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
	(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
	}

	multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
	let Predicates = [HasBWI] in {
	def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
	(VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
	def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
	(VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
	def : Pat<(_.info512.VT (vselect VK32WM:$mask,
	(_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
	(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
	def : Pat<(_.info512.VT (vselect VK32WM:$mask,
	(_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
	(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
	def : Pat<(_.info512.VT (vselect VK32WM:$mask,
	(_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
	(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
	def : Pat<(_.info512.VT (vselect VK32WM:$mask,
	(_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
	(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
	def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
	(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
	def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
	(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
	def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
	(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;

	def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
	(VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
	}
	let Predicates = [HasBWI, HasVLX] in {
	def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
	(VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
	def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
	(VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
	def : Pat<(_.info256.VT (vselect VK16WM:$mask,
	(_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
	(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
	def : Pat<(_.info256.VT (vselect VK16WM:$mask,
	(_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
	(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(_.info256.VT (vselect VK16WM:$mask,
	(_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
	(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
	def : Pat<(_.info256.VT (vselect VK16WM:$mask,
	(_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
	(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
	(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
	def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
	(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
	def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
	(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;

	def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
	(VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;

	def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
	(VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
	def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
	(VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
	def : Pat<(_.info128.VT (vselect VK8WM:$mask,
	(_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
	(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
	def : Pat<(_.info128.VT (vselect VK8WM:$mask,
	(_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
	(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(_.info128.VT (vselect VK8WM:$mask,
	(_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
	(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
	def : Pat<(_.info128.VT (vselect VK8WM:$mask,
	(_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
	(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
	(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
	def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
	(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
	def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
	(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;

	def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
	(VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
	}
	}

	defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
	defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;

	// Move Int Doubleword to Packed Double Int
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v4i32 (scalar_to_vector GR32:$src)))]>,
	EVEX, Sched<[WriteVecMoveFromGpr]>;
	def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
	EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
	def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v2i64 (scalar_to_vector GR64:$src)))]>,
	EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
	let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
	def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
	(ins i64mem:$src),
	"vmovq\t{$src, $dst\|$dst, $src}", []>,
	EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
	let isCodeGenOnly = 1 in {
	def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set FR64X:$dst, (bitconvert GR64:$src))]>,
	EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
	def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set GR64:$dst, (bitconvert FR64X:$src))]>,
	EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
	}
	} // ExeDomain = SSEPackedInt

	// Move Int Doubleword to Single Scalar
	//
	let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
	def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set FR32X:$dst, (bitconvert GR32:$src))]>,
	EVEX, Sched<[WriteVecMoveFromGpr]>;
	} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1

	// Move doubleword from xmm register to r/m32
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
	(iPTR 0)))]>,
	EVEX, Sched<[WriteVecMoveToGpr]>;
	def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
	(ins i32mem:$dst, VR128X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(store (i32 (extractelt (v4i32 VR128X:$src),
	(iPTR 0))), addr:$dst)]>,
	EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
	} // ExeDomain = SSEPackedInt

	// Move quadword from xmm1 register to r/m64
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
	(iPTR 0)))]>,
	PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
	Requires<[HasAVX512]>;

	let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
	def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}", []>, PD,
	EVEX, VEX_W, Sched<[WriteVecStore]>,
	Requires<[HasAVX512, In64BitMode]>;

	def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
	(ins i64mem:$dst, VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
	addr:$dst)]>,
	EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
	Sched<[WriteVecStore]>, Requires<[HasAVX512]>;

	let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
	def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}", []>,
	EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
	} // ExeDomain = SSEPackedInt

	def : InstAlias<"vmovq.s\t{$src, $dst\|$dst, $src}",
	(VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;

	let Predicates = [HasAVX512] in {
	def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
	(VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
	}

	// Move Scalar Single to Double Int
	//
	let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
	def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
	(ins FR32X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set GR32:$dst, (bitconvert FR32X:$src))]>,
	EVEX, Sched<[WriteVecMoveToGpr]>;
	} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1

	// Move Quadword Int to Packed Quadword Int
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
	(ins i64mem:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
	EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
	} // ExeDomain = SSEPackedInt

	// Allow "vmovd" but print "vmovq".
	def : InstAlias<"vmovd\t{$src, $dst\|$dst, $src}",
	(VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
	def : InstAlias<"vmovd\t{$src, $dst\|$dst, $src}",
	(VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;

	// Conversions between masks and scalar fp.
	def : Pat<(v32i1 (bitconvert FR32X:$src)),
	(KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
	def : Pat<(f32 (bitconvert VK32:$src)),
	(VMOVDI2SSZrr (KMOVDrk VK32:$src))>;

	def : Pat<(v64i1 (bitconvert FR64X:$src)),
	(KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
	def : Pat<(f64 (bitconvert VK64:$src)),
	(VMOV64toSDZrr (KMOVQrk VK64:$src))>;

	//===----------------------------------------------------------------------===//
	// AVX-512 MOVSH, MOVSS, MOVSD
	//===----------------------------------------------------------------------===//

	multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
	X86VectorVTInfo _, Predicate prd = HasAVX512> {
	let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
	def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2),
	!strconcat(asm, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
	_.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
	let Predicates = [prd] in {
	def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}\|",
	"$dst {${mask}} {z}, $src1, $src2}"),
	[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
	(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
	_.ImmAllZerosV)))],
	_.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
	let Constraints = "$src0 = $dst" in
	def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(asm, "\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
	(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
	(_.VT _.RC:$src0))))],
	_.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
	let canFoldAsLoad = 1, isReMaterializable = 1 in {
	def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst\|$dst, $src}"),
	[(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
	_.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
	// _alt version uses FR32/FR64 register class.
	let isCodeGenOnly = 1 in
	def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst\|$dst, $src}"),
	[(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
	_.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
	}
	let mayLoad = 1, hasSideEffects = 0 in {
	let Constraints = "$src0 = $dst" in
	def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst {${mask}}\|",
	"$dst {${mask}}, $src}"),
	[], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
	def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst {${mask}} {z}\|",
	"$dst {${mask}} {z}, $src}"),
	[], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
	}
	def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
	!strconcat(asm, "\t{$src, $dst\|$dst, $src}"),
	[(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
	EVEX, Sched<[WriteFStore]>;
	let mayStore = 1, hasSideEffects = 0 in
	def mrk: AVX512PI<0x11, MRMDestMem, (outs),
	(ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
	!strconcat(asm, "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}"),
	[], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
	NotMemoryFoldable;
	}
	}

	defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
	VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;

	defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
	VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
	HasFP16>,
	VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;

	multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
	PatLeaf ZeroFP, X86VectorVTInfo _> {

	def : Pat<(_.VT (OpNode _.RC:$src0,
	(_.VT (scalar_to_vector
	(_.EltVT (X86selects VK1WM:$mask,
	(_.EltVT _.FRC:$src1),
	(_.EltVT _.FRC:$src2))))))),
	(!cast<Instruction>(InstrStr#rrk)
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
	VK1WM:$mask,
	(_.VT _.RC:$src0),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;

	def : Pat<(_.VT (OpNode _.RC:$src0,
	(_.VT (scalar_to_vector
	(_.EltVT (X86selects VK1WM:$mask,
	(_.EltVT _.FRC:$src1),
	(_.EltVT ZeroFP))))))),
	(!cast<Instruction>(InstrStr#rrkz)
	VK1WM:$mask,
	(_.VT _.RC:$src0),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
	}

	multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC> {

	def : Pat<(masked_store
	(_.info512.VT (insert_subvector undef,
	(_.info128.VT _.info128.RC:$src),
	(iPTR 0))), addr:$dst, Mask),
	(!cast<Instruction>(InstrStr#mrk) addr:$dst,
	(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
	_.info128.RC:$src)>;

	}

	multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
	AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC,
	SubRegIndex subreg> {

	def : Pat<(masked_store
	(_.info512.VT (insert_subvector undef,
	(_.info128.VT _.info128.RC:$src),
	(iPTR 0))), addr:$dst, Mask),
	(!cast<Instruction>(InstrStr#mrk) addr:$dst,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	_.info128.RC:$src)>;

	}

	// This matches the more recent codegen from clang that avoids emitting a 512
	// bit masked store directly. Codegen will widen 128-bit masked store to 512
	// bits on AVX512F only targets.
	multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
	AVX512VLVectorVTInfo _,
	dag Mask512, dag Mask128,
	RegisterClass MaskRC,
	SubRegIndex subreg> {

	// AVX512F pattern.
	def : Pat<(masked_store
	(_.info512.VT (insert_subvector undef,
	(_.info128.VT _.info128.RC:$src),
	(iPTR 0))), addr:$dst, Mask512),
	(!cast<Instruction>(InstrStr#mrk) addr:$dst,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	_.info128.RC:$src)>;

	// AVX512VL pattern.
	def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
	(!cast<Instruction>(InstrStr#mrk) addr:$dst,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	_.info128.RC:$src)>;
	}

	multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC> {

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	_.info512.ImmAllZerosV)),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmkz)
	(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
	addr:$srcAddr)>;

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	(_.info512.VT (insert_subvector undef,
	(_.info128.VT (X86vzmovl _.info128.RC:$src)),
	(iPTR 0))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
	(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
	addr:$srcAddr)>;

	}

	multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
	AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC,
	SubRegIndex subreg> {

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	_.info512.ImmAllZerosV)),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmkz)
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	(_.info512.VT (insert_subvector undef,
	(_.info128.VT (X86vzmovl _.info128.RC:$src)),
	(iPTR 0))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	}

	// This matches the more recent codegen from clang that avoids emitting a 512
	// bit masked load directly. Codegen will widen 128-bit masked load to 512
	// bits on AVX512F only targets.
	multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
	AVX512VLVectorVTInfo _,
	dag Mask512, dag Mask128,
	RegisterClass MaskRC,
	SubRegIndex subreg> {
	// AVX512F patterns.
	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask512,
	_.info512.ImmAllZerosV)),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmkz)
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask512,
	(_.info512.VT (insert_subvector undef,
	(_.info128.VT (X86vzmovl _.info128.RC:$src)),
	(iPTR 0))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	// AVX512Vl patterns.
	def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
	_.info128.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rmkz)
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
	(_.info128.VT (X86vzmovl _.info128.RC:$src)))),
	(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;
	}

	defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
	defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;

	defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
	defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
	defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;

	let Predicates = [HasFP16] in {
	defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
	defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
	(v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
	defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
	(v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
	defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
	(v32i1 (insert_subvector
	(v32i1 immAllZerosV),
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	(iPTR 0))),
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	GR8, sub_8bit>;

	defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
	(v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
	defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
	(v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
	defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
	(v32i1 (insert_subvector
	(v32i1 immAllZerosV),
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	(iPTR 0))),
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	GR8, sub_8bit>;

	def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
	(COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
	(v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
	VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
	(v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;

	def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
	(COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
	(v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
	}

	defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (insert_subvector
	(v16i1 immAllZerosV),
	(v4i1 (extract_subvector
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	(iPTR 0))),
	(iPTR 0))),
	(v4i1 (extract_subvector
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	(iPTR 0))), GR8, sub_8bit>;
	defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
	(v8i1
	(extract_subvector
	(v16i1
	(insert_subvector
	(v16i1 immAllZerosV),
	(v2i1 (extract_subvector
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
	(iPTR 0))),
	(iPTR 0))),
	(iPTR 0))),
	(v2i1 (extract_subvector
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
	(iPTR 0))), GR8, sub_8bit>;

	defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
	defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
	defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;

	defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (insert_subvector
	(v16i1 immAllZerosV),
	(v4i1 (extract_subvector
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	(iPTR 0))),
	(iPTR 0))),
	(v4i1 (extract_subvector
	(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
	(iPTR 0))), GR8, sub_8bit>;
	defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
	(v8i1
	(extract_subvector
	(v16i1
	(insert_subvector
	(v16i1 immAllZerosV),
	(v2i1 (extract_subvector
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
	(iPTR 0))),
	(iPTR 0))),
	(iPTR 0))),
	(v2i1 (extract_subvector
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
	(iPTR 0))), GR8, sub_8bit>;

	def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
	(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
	(v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
	VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
	(v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;

	def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
	(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
	(v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;

	def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
	(COPY_TO_REGCLASS
	(v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
	VK1WM:$mask, addr:$src)),
	FR32X)>;
	def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
	(COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;

	def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
	(COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
	(v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
	VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
	(v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;

	def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
	(COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
	(v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;

	def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
	(COPY_TO_REGCLASS
	(v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
	VK1WM:$mask, addr:$src)),
	FR64X)>;
	def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
	(COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;


	def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
	(VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
	def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
	(VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;

	def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
	(VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
	def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
	(VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;

	let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
	let Predicates = [HasFP16] in {
	def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovsh\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[]>, T_MAP5XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSHZrr">,
	Sched<[SchedWriteFShuffle.XMM]>;

	let Constraints = "$src0 = $dst" in
	def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
	VR128X:$src1, VR128X:$src2),
	"vmovsh\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	[]>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSHZrrk">,
	Sched<[SchedWriteFShuffle.XMM]>;

	def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
	"vmovsh\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	[]>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSHZrrkz">,
	Sched<[SchedWriteFShuffle.XMM]>;
	}
	def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovss\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[]>, XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSSZrr">,
	Sched<[SchedWriteFShuffle.XMM]>;

	let Constraints = "$src0 = $dst" in
	def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
	VR128X:$src1, VR128X:$src2),
	"vmovss\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	[]>, EVEX_K, XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSSZrrk">,
	Sched<[SchedWriteFShuffle.XMM]>;

	def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
	"vmovss\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	[]>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSSZrrkz">,
	Sched<[SchedWriteFShuffle.XMM]>;

	def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovsd\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[]>, XD, EVEX_4V, VEX_LIG, VEX_W,
	FoldGenData<"VMOVSDZrr">,
	Sched<[SchedWriteFShuffle.XMM]>;

	let Constraints = "$src0 = $dst" in
	def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
	VR128X:$src1, VR128X:$src2),
	"vmovsd\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	[]>, EVEX_K, XD, EVEX_4V, VEX_LIG,
	VEX_W, FoldGenData<"VMOVSDZrrk">,
	Sched<[SchedWriteFShuffle.XMM]>;

	def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f64x_info.KRCWM:$mask, VR128X:$src1,
	VR128X:$src2),
	"vmovsd\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	[]>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
	VEX_W, FoldGenData<"VMOVSDZrrkz">,
	Sched<[SchedWriteFShuffle.XMM]>;
	}

	def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	(VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	(VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
	VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	(VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
	VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovss.s\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	(VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	(VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
	VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	(VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
	VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	(VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	(VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
	VR128X:$src1, VR128X:$src2), 0>;
	def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	(VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
	VR128X:$src1, VR128X:$src2), 0>;

	let Predicates = [HasAVX512, OptForSize] in {
	def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
	(VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
	def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
	(VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;

	// Move low f32 and clear high bits.
	def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
	(v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
	def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
	(v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;

	def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
	(v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
	def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
	(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
	}

	// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
	// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
	let Predicates = [HasAVX512, OptForSpeed] in {
	def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
	(v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
	(i8 1))), sub_xmm)>;
	def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
	(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
	(i8 3))), sub_xmm)>;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
	(VMOVSSZrm addr:$src)>;
	def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
	(VMOVSDZrm addr:$src)>;

	// Represent the same patterns above but in the form they appear for
	// 256-bit types
	def : Pat<(v8f32 (X86vzload32 addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
	def : Pat<(v4f64 (X86vzload64 addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;

	// Represent the same patterns above but in the form they appear for
	// 512-bit types
	def : Pat<(v16f32 (X86vzload32 addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
	def : Pat<(v8f64 (X86vzload64 addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
	}
	let Predicates = [HasFP16] in {
	def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
	(VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
	def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
	(VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;

	// FIXME we need better canonicalization in dag combine
	def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
	(v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
	def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
	(v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;

	// FIXME we need better canonicalization in dag combine
	def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
	(v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
	def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
	(v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;

	def : Pat<(v8f16 (X86vzload16 addr:$src)),
	(VMOVSHZrm addr:$src)>;

	def : Pat<(v16f16 (X86vzload16 addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;

	def : Pat<(v32f16 (X86vzload16 addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
	}

	let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
	def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst, (v2i64 (X86vzmovl
	(v2i64 VR128X:$src))))]>,
	EVEX, VEX_W;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
	(VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
	GR8:$src, sub_8bit)))>;
	def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
	(VMOVDI2PDIZrr GR32:$src)>;

	def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
	(VMOV64toPQIZrr GR64:$src)>;

	// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
	def : Pat<(v4i32 (X86vzload32 addr:$src)),
	(VMOVDI2PDIZrm addr:$src)>;
	def : Pat<(v8i32 (X86vzload32 addr:$src)),
	(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
	def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
	(VMOVZPQILo2PQIZrr VR128X:$src)>;
	def : Pat<(v2i64 (X86vzload64 addr:$src)),
	(VMOVQI2PQIZrm addr:$src)>;
	def : Pat<(v4i64 (X86vzload64 addr:$src)),
	(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;

	// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
	def : Pat<(v16i32 (X86vzload32 addr:$src)),
	(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
	def : Pat<(v8i64 (X86vzload64 addr:$src)),
	(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;

	def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(v2f64 (VMOVZPQILo2PQIZrr
	(v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
	sub_xmm)>;
	def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(v2i64 (VMOVZPQILo2PQIZrr
	(v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
	sub_xmm)>;

	def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v2f64 (VMOVZPQILo2PQIZrr
	(v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
	sub_xmm)>;
	def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(v2i64 (VMOVZPQILo2PQIZrr
	(v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
	sub_xmm)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - Non-temporals
	//===----------------------------------------------------------------------===//

	def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
	(ins i512mem:$src), "vmovntdqa\t{$src, $dst\|$dst, $src}",
	[], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
	EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;

	let Predicates = [HasVLX] in {
	def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
	(ins i256mem:$src),
	"vmovntdqa\t{$src, $dst\|$dst, $src}",
	[], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
	EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;

	def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
	(ins i128mem:$src),
	"vmovntdqa\t{$src, $dst\|$dst, $src}",
	[], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
	EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86SchedWriteMoveLS Sched,
	PatFrag st_frag = alignednontemporalstore> {
	let SchedRW = [Sched.MR], AddedComplexity = 400 in
	def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(st_frag (_.VT _.RC:$src), addr:$dst)],
	_.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
	}

	multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo,
	X86SchedWriteMoveLSWidths Sched> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
	defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
	}
	}

	defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
	SchedWriteVecMoveLSNT>, PD;
	defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
	SchedWriteFMoveLSNT>, PD, VEX_W;
	defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
	SchedWriteFMoveLSNT>, PS;

	let Predicates = [HasAVX512], AddedComplexity = 400 in {
	def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
	(VMOVNTDQZmr addr:$dst, VR512:$src)>;
	def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
	(VMOVNTDQZmr addr:$dst, VR512:$src)>;
	def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
	(VMOVNTDQZmr addr:$dst, VR512:$src)>;

	def : Pat<(v8f64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v16f32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v8i64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v16i32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v32i16 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v64i8 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	}

	let Predicates = [HasVLX], AddedComplexity = 400 in {
	def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
	(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
	(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
	(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;

	def : Pat<(v4f64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v8f32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v4i64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v8i32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v16i16 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v32i8 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;

	def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
	(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
	(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
	(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;

	def : Pat<(v2f64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v4f32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v2i64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v4i32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v8i16 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v16i8 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - Integer arithmetic
	//
	multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, X86FoldableSchedWrite sched,
	bit IsCommutable = 0> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
	IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
	Sched<[sched]>;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
	AVX512BIBase, EVEX_4V,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, X86FoldableSchedWrite sched,
	bit IsCommutable = 0> :
	avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr,
	(_.VT (OpNode _.RC:$src1,
	(_.BroadcastLdFrag addr:$src2)))>,
	AVX512BIBase, EVEX_4V, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo,
	X86SchedWriteWidths sched, Predicate prd,
	bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
	IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
	sched.YMM, IsCommutable>, EVEX_V256;
	defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
	sched.XMM, IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo,
	X86SchedWriteWidths sched, Predicate prd,
	bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
	IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
	sched.YMM, IsCommutable>, EVEX_V256;
	defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
	sched.XMM, IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
	sched, prd, IsCommutable>,
	VEX_W, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
	sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
	}

	multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
	sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
	VEX_WIG;
	}

	multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
	sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
	VEX_WIG;
	}

	multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched,
	Predicate prd, bit IsCommutable = 0> {
	defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
	IsCommutable>;

	defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
	IsCommutable>;
	}

	multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched,
	Predicate prd, bit IsCommutable = 0> {
	defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
	IsCommutable>;

	defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
	IsCommutable>;
	}

	multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
	bits<8> opc_d, bits<8> opc_q,
	string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
	sched, HasAVX512, IsCommutable>,
	avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
	sched, HasBWI, IsCommutable>;
	}

	multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	SDNode OpNode,X86VectorVTInfo _Src,
	X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
	bit IsCommutable = 0> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"$src2, $src1","$src1, $src2",
	(_Dst.VT (OpNode
	(_Src.VT _Src.RC:$src1),
	(_Src.VT _Src.RC:$src2))),
	IsCommutable>,
	AVX512BIBase, EVEX_4V, Sched<[sched]>;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
	(_Src.LdFrag addr:$src2)))>,
	AVX512BIBase, EVEX_4V,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
	OpcodeStr,
	"${src2}"#_Brdct.BroadcastStr#", $src1",
	"$src1, ${src2}"#_Brdct.BroadcastStr,
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
	(_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
	AVX512BIBase, EVEX_4V, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
	SchedWriteVecALU, 1>;
	defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
	SchedWriteVecALU, 0>;
	defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
	SchedWriteVecALU, HasBWI, 0>;
	defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
	SchedWriteVecALU, HasBWI, 0>;
	defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
	SchedWritePMULLD, HasAVX512, 1>, T8PD;
	defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
	SchedWriteVecIMul, HasBWI, 1>;
	defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
	SchedWriteVecIMul, HasDQI, 1>, T8PD,
	NotEVEX2VEXConvertible;
	defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
	HasBWI, 1>;
	defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
	HasBWI, 1>;
	defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
	SchedWriteVecIMul, HasBWI, 1>, T8PD;
	defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
	SchedWriteVecIMul, HasAVX512, 1>, T8PD;
	defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
	SchedWriteVecIMul, HasAVX512, 1>;

	multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _SrcVTInfo,
	AVX512VLVectorVTInfo _DstVTInfo,
	SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
	_SrcVTInfo.info512, _DstVTInfo.info512,
	v8i64_info, IsCommutable>,
	EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
	let Predicates = [HasVLX, prd] in {
	defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
	_SrcVTInfo.info256, _DstVTInfo.info256,
	v4i64x_info, IsCommutable>,
	EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
	defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
	_SrcVTInfo.info128, _DstVTInfo.info128,
	v2i64x_info, IsCommutable>,
	EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
	}
	}

	defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
	avx512vl_i8_info, avx512vl_i8_info,
	X86multishift, HasVBMI, 0>, T8PD;

	multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
	X86FoldableSchedWrite sched> {
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
	OpcodeStr,
	"${src2}"#_Src.BroadcastStr#", $src1",
	"$src1, ${src2}"#_Src.BroadcastStr,
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
	(_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
	EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
	SDNode OpNode,X86VectorVTInfo _Src,
	X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
	bit IsCommutable = 0> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"$src2, $src1","$src1, $src2",
	(_Dst.VT (OpNode
	(_Src.VT _Src.RC:$src1),
	(_Src.VT _Src.RC:$src2))),
	IsCommutable, IsCommutable>,
	EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
	(_Src.LdFrag addr:$src2)))>,
	EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
	v32i16_info, SchedWriteShuffle.ZMM>,
	avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
	v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
	let Predicates = [HasBWI, HasVLX] in {
	defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
	v16i16x_info, SchedWriteShuffle.YMM>,
	avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
	v16i16x_info, SchedWriteShuffle.YMM>,
	EVEX_V256;
	defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
	v8i16x_info, SchedWriteShuffle.XMM>,
	avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
	v8i16x_info, SchedWriteShuffle.XMM>,
	EVEX_V128;
	}
	}
	multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
	SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
	let Predicates = [HasBWI, HasVLX] in {
	defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
	v32i8x_info, SchedWriteShuffle.YMM>,
	EVEX_V256, VEX_WIG;
	defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
	v16i8x_info, SchedWriteShuffle.XMM>,
	EVEX_V128, VEX_WIG;
	}
	}

	multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
	SDNode OpNode, AVX512VLVectorVTInfo _Src,
	AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
	let Predicates = [HasBWI] in
	defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
	_Dst.info512, SchedWriteVecIMul.ZMM,
	IsCommutable>, EVEX_V512;
	let Predicates = [HasBWI, HasVLX] in {
	defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
	_Dst.info256, SchedWriteVecIMul.YMM,
	IsCommutable>, EVEX_V256;
	defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
	_Dst.info128, SchedWriteVecIMul.XMM,
	IsCommutable>, EVEX_V128;
	}
	}

	defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
	defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
	defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
	defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;

	defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
	avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
	defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
	avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;

	defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
	SchedWriteVecALU, HasBWI, 1>, T8PD;
	defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
	SchedWriteVecALU, HasAVX512, 1>, T8PD;
	defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
	SchedWriteVecALU, HasAVX512, 1>, T8PD,
	NotEVEX2VEXConvertible;

	defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
	SchedWriteVecALU, HasBWI, 1>, T8PD;
	defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
	SchedWriteVecALU, HasAVX512, 1>, T8PD;
	defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
	SchedWriteVecALU, HasAVX512, 1>, T8PD,
	NotEVEX2VEXConvertible;

	defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
	SchedWriteVecALU, HasBWI, 1>, T8PD;
	defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
	SchedWriteVecALU, HasAVX512, 1>, T8PD;
	defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
	SchedWriteVecALU, HasAVX512, 1>, T8PD,
	NotEVEX2VEXConvertible;

	defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
	SchedWriteVecALU, HasBWI, 1>;
	defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
	SchedWriteVecALU, HasBWI, 1>, T8PD;
	defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
	SchedWriteVecALU, HasAVX512, 1>, T8PD;
	defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
	SchedWriteVecALU, HasAVX512, 1>, T8PD,
	NotEVEX2VEXConvertible;

	// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
	let Predicates = [HasDQI, NoVLX] in {
	def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
	(EXTRACT_SUBREG
	(VPMULLQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
	sub_ymm)>;
	def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
	(EXTRACT_SUBREG
	(VPMULLQZrmb
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	addr:$src2),
	sub_ymm)>;

	def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG
	(VPMULLQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
	sub_xmm)>;
	def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
	(EXTRACT_SUBREG
	(VPMULLQZrmb
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	addr:$src2),
	sub_xmm)>;
	}

	multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
	def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
	(EXTRACT_SUBREG
	(!cast<Instruction>(Instr#"rr")
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
	sub_ymm)>;
	def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(Instr#"rmb")
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	addr:$src2),
	sub_ymm)>;

	def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
	(EXTRACT_SUBREG
	(!cast<Instruction>(Instr#"rr")
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
	sub_xmm)>;
	def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(Instr#"rmb")
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	addr:$src2),
	sub_xmm)>;
	}

	let Predicates = [HasAVX512, NoVLX] in {
	defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
	defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
	defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
	defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 Logical Instructions
	//===----------------------------------------------------------------------===//

	defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
	SchedWriteVecLogic, HasAVX512, 1>;
	defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
	SchedWriteVecLogic, HasAVX512, 1>;
	defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
	SchedWriteVecLogic, HasAVX512, 1>;
	defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
	SchedWriteVecLogic, HasAVX512>;

	let Predicates = [HasVLX] in {
	def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
	(VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
	def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
	(VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;

	def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
	(VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
	def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
	(VPORQZ128rr VR128X:$src1, VR128X:$src2)>;

	def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
	(VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
	def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
	(VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;

	def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
	(VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
	def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
	(VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;

	def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
	(VPANDQZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
	(VPANDQZ128rm VR128X:$src1, addr:$src2)>;

	def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
	(VPORQZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
	(VPORQZ128rm VR128X:$src1, addr:$src2)>;

	def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
	(VPXORQZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
	(VPXORQZ128rm VR128X:$src1, addr:$src2)>;

	def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
	(VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
	(VPANDNQZ128rm VR128X:$src1, addr:$src2)>;

	def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
	(VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
	def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
	(VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;

	def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
	(VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
	def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
	(VPORQZ256rr VR256X:$src1, VR256X:$src2)>;

	def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
	(VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
	def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
	(VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;

	def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
	(VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
	def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
	(VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;

	def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
	(VPANDQZ256rm VR256X:$src1, addr:$src2)>;
	def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
	(VPANDQZ256rm VR256X:$src1, addr:$src2)>;

	def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
	(VPORQZ256rm VR256X:$src1, addr:$src2)>;
	def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
	(VPORQZ256rm VR256X:$src1, addr:$src2)>;

	def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
	(VPXORQZ256rm VR256X:$src1, addr:$src2)>;
	def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
	(VPXORQZ256rm VR256X:$src1, addr:$src2)>;

	def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
	(VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
	def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
	(VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
	(VPANDQZrr VR512:$src1, VR512:$src2)>;
	def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
	(VPANDQZrr VR512:$src1, VR512:$src2)>;

	def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
	(VPORQZrr VR512:$src1, VR512:$src2)>;
	def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
	(VPORQZrr VR512:$src1, VR512:$src2)>;

	def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
	(VPXORQZrr VR512:$src1, VR512:$src2)>;
	def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
	(VPXORQZrr VR512:$src1, VR512:$src2)>;

	def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
	(VPANDNQZrr VR512:$src1, VR512:$src2)>;
	def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
	(VPANDNQZrr VR512:$src1, VR512:$src2)>;

	def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
	(VPANDQZrm VR512:$src1, addr:$src2)>;
	def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
	(VPANDQZrm VR512:$src1, addr:$src2)>;

	def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
	(VPORQZrm VR512:$src1, addr:$src2)>;
	def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
	(VPORQZrm VR512:$src1, addr:$src2)>;

	def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
	(VPXORQZrm VR512:$src1, addr:$src2)>;
	def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
	(VPXORQZrm VR512:$src1, addr:$src2)>;

	def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
	(VPANDNQZrm VR512:$src1, addr:$src2)>;
	def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
	(VPANDNQZrm VR512:$src1, addr:$src2)>;
	}

	// Patterns to catch vselect with different type than logic op.
	multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
	X86VectorVTInfo _,
	X86VectorVTInfo IntInfo> {
	// Masked register-register logical operations.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
	_.RC:$src1, _.RC:$src2)>;

	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
	_.RC:$src2)>;

	// Masked register-memory logical operations.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(bitconvert (IntInfo.VT (OpNode _.RC:$src1,
	(load addr:$src2)))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
	_.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(bitconvert (IntInfo.VT (OpNode _.RC:$src1,
	(load addr:$src2)))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
	addr:$src2)>;
	}

	multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
	X86VectorVTInfo _,
	X86VectorVTInfo IntInfo> {
	// Register-broadcast logical operations.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(bitconvert
	(IntInfo.VT (OpNode _.RC:$src1,
	(IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
	_.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(bitconvert
	(IntInfo.VT (OpNode _.RC:$src1,
	(IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
	_.RC:$src1, addr:$src2)>;
	}

	multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
	AVX512VLVectorVTInfo SelectInfo,
	AVX512VLVectorVTInfo IntInfo> {
	let Predicates = [HasVLX] in {
	defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
	IntInfo.info128>;
	defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
	IntInfo.info256>;
	}
	let Predicates = [HasAVX512] in {
	defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
	IntInfo.info512>;
	}
	}

	multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
	AVX512VLVectorVTInfo SelectInfo,
	AVX512VLVectorVTInfo IntInfo> {
	let Predicates = [HasVLX] in {
	defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
	SelectInfo.info128, IntInfo.info128>;
	defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
	SelectInfo.info256, IntInfo.info256>;
	}
	let Predicates = [HasAVX512] in {
	defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
	SelectInfo.info512, IntInfo.info512>;
	}
	}

	multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
	// i64 vselect with i32/i16/i8 logic op
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
	avx512vl_i32_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
	avx512vl_i16_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
	avx512vl_i8_info>;

	// i32 vselect with i64/i16/i8 logic op
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
	avx512vl_i64_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
	avx512vl_i16_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
	avx512vl_i8_info>;

	// f32 vselect with i64/i32/i16/i8 logic op
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
	avx512vl_i64_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
	avx512vl_i32_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
	avx512vl_i16_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
	avx512vl_i8_info>;

	// f64 vselect with i64/i32/i16/i8 logic op
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
	avx512vl_i64_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
	avx512vl_i32_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
	avx512vl_i16_info>;
	defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
	avx512vl_i8_info>;

	defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
	avx512vl_f32_info,
	avx512vl_i32_info>;
	defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
	avx512vl_f64_info,
	avx512vl_i64_info>;
	}

	defm : avx512_logical_lowering_types<"VPAND", and>;
	defm : avx512_logical_lowering_types<"VPOR", or>;
	defm : avx512_logical_lowering_types<"VPXOR", xor>;
	defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;

	//===----------------------------------------------------------------------===//
	// AVX-512 FP arithmetic
	//===----------------------------------------------------------------------===//

	multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDPatternOperator OpNode, SDNode VecNode,
	X86FoldableSchedWrite sched, bit IsCommutable> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
	Sched<[sched]>;

	defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1,
	(_.ScalarIntMemFrags addr:$src2)))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
	def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
	Sched<[sched]> {
	let isCommutable = IsCommutable;
	}
	def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2)))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}
	}

	multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode VecNode, X86FoldableSchedWrite sched> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 timm:$rc))>,
	EVEX_B, EVEX_RC, Sched<[sched]>;
	}
	multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode OpNode, SDNode VecNode, SDNode SaeNode,
	X86FoldableSchedWrite sched, bit IsCommutable,
	string EVEX2VexOvrd> {
	let ExeDomain = _.ExeDomain in {
	defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
	Sched<[sched]>, SIMD_EXC;

	defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1,
	(_.ScalarIntMemFrags addr:$src2)))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;

	let isCodeGenOnly = 1, Predicates = [HasAVX512],
	Uses = [MXCSR], mayRaiseFPException = 1 in {
	def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
	Sched<[sched]>,
	EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
	let isCommutable = IsCommutable;
	}
	def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2)))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>,
	EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
	}

	let Uses = [MXCSR] in
	defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
	EVEX_B, Sched<[sched]>;
	}
	}

	multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode VecNode, SDNode RndNode,
	X86SchedWriteSizes sched, bit IsCommutable> {
	defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
	sched.PS.Scl, IsCommutable>,
	avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
	sched.PS.Scl>,
	XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
	sched.PD.Scl, IsCommutable>,
	avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
	sched.PD.Scl>,
	XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
	let Predicates = [HasFP16] in
	defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
	VecNode, sched.PH.Scl, IsCommutable>,
	avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
	sched.PH.Scl>,
	T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
	}

	multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode VecNode, SDNode SaeNode,
	X86SchedWriteSizes sched, bit IsCommutable> {
	defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
	VecNode, SaeNode, sched.PS.Scl, IsCommutable,
	NAME#"SS">,
	XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
	VecNode, SaeNode, sched.PD.Scl, IsCommutable,
	NAME#"SD">,
	XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
	let Predicates = [HasFP16] in {
	defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
	VecNode, SaeNode, sched.PH.Scl, IsCommutable,
	NAME#"SH">,
	T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
	NotEVEX2VEXConvertible;
	}
	}
	defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
	SchedWriteFAddSizes, 1>;
	defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
	SchedWriteFMulSizes, 1>;
	defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
	SchedWriteFAddSizes, 0>;
	defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
	SchedWriteFDivSizes, 0>;
	defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
	SchedWriteFCmpSizes, 0>;
	defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
	SchedWriteFCmpSizes, 0>;

	// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
	// X86fminc and X86fmaxc instead of X86fmin and X86fmax
	multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, SDNode OpNode,
	X86FoldableSchedWrite sched,
	string EVEX2VEXOvrd> {
	let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
	def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
	Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
	let isCommutable = 1;
	}
	def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2)))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>,
	EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
	}
	}
	defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
	SchedWriteFCmp.Scl, "VMINCSS">, XS,
	EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;

	defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
	SchedWriteFCmp.Scl, "VMINCSD">, XD,
	VEX_W, EVEX_4V, VEX_LIG,
	EVEX_CD8<64, CD8VT1>, SIMD_EXC;

	defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
	SchedWriteFCmp.Scl, "VMAXCSS">, XS,
	EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;

	defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
	SchedWriteFCmp.Scl, "VMAXCSD">, XD,
	VEX_W, EVEX_4V, VEX_LIG,
	EVEX_CD8<64, CD8VT1>, SIMD_EXC;

	defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
	SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
	EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
	NotEVEX2VEXConvertible;
	defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
	SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
	EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
	NotEVEX2VEXConvertible;

	multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode,
	X86VectorVTInfo _, X86FoldableSchedWrite sched,
	bit IsCommutable,
	bit IsKCommutable = IsCommutable,
	string suffix = _.Suffix,
	string ClobberConstraint = "",
	bit MayRaiseFPException = 1> {
	let ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
	defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
	(_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
	IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
	let mayLoad = 1 in {
	defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
	(MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
	ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
	"${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr,
	(OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
	(MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
	ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}
	}

	multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNodeRnd,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	string suffix = _.Suffix,
	string ClobberConstraint = ""> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
	0, 0, 0, vselect_mask, ClobberConstraint>,
	EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
	}

	multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNodeSAE,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
	EVEX_4V, EVEX_B, Sched<[sched]>;
	}

	multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode,
	Predicate prd, X86SchedWriteSizes sched,
	bit IsCommutable = 0,
	bit IsPD128Commutable = IsCommutable> {
	let Predicates = [prd] in {
	defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
	sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
	EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
	sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
	EVEX_CD8<64, CD8VF>;
	}

	// Define only if AVX512VL feature is present.
	let Predicates = [prd, HasVLX] in {
	defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
	sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
	EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
	sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
	EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
	sched.PD.XMM, IsPD128Commutable,
	IsCommutable>, EVEX_V128, PD, VEX_W,
	EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
	sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
	EVEX_CD8<64, CD8VF>;
	}
	}

	multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode,
	X86SchedWriteSizes sched, bit IsCommutable = 0> {
	let Predicates = [HasFP16] in {
	defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
	sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
	EVEX_CD8<16, CD8VF>;
	}
	let Predicates = [HasVLX, HasFP16] in {
	defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
	sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
	EVEX_CD8<16, CD8VF>;
	defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
	sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
	EVEX_CD8<16, CD8VF>;
	}
	}

	let Uses = [MXCSR] in
	multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
	X86SchedWriteSizes sched> {
	let Predicates = [HasFP16] in {
	defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
	v32f16_info>,
	EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	}
	defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
	v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
	v8f64_info>,
	EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
	}

	let Uses = [MXCSR] in
	multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
	X86SchedWriteSizes sched> {
	let Predicates = [HasFP16] in {
	defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
	v32f16_info>,
	EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	}
	defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
	v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
	v8f64_info>,
	EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
	}

	defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
	SchedWriteFAddSizes, 1>,
	avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
	avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
	defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
	SchedWriteFMulSizes, 1>,
	avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
	avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
	defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
	SchedWriteFAddSizes>,
	avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
	avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
	defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
	SchedWriteFDivSizes>,
	avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
	avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
	defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
	SchedWriteFCmpSizes, 0>,
	avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
	avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
	defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
	SchedWriteFCmpSizes, 0>,
	avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
	avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
	let isCodeGenOnly = 1 in {
	defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
	SchedWriteFCmpSizes, 1>,
	avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
	SchedWriteFCmpSizes, 1>;
	defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
	SchedWriteFCmpSizes, 1>,
	avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
	SchedWriteFCmpSizes, 1>;
	}
	let Uses = []<Register>, mayRaiseFPException = 0 in {
	defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
	SchedWriteFLogicSizes, 1>;
	defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
	SchedWriteFLogicSizes, 0>;
	defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
	SchedWriteFLogicSizes, 1>;
	defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
	SchedWriteFLogicSizes, 1>;
	}

	multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
	EVEX_4V, Sched<[sched]>;
	defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
	"${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr,
	(OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
	EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
	Sched<[sched]>;
	defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
	avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
	EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
	defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
	avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
	EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
	}
	defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
	avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
	EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
	defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
	avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
	EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
	defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
	avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
	X86scalefsRnd, sched.Scl>,
	EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
	defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
	avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
	X86scalefsRnd, sched.Scl>,
	EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;

	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
	EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
	defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
	defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
	EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
	defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
	EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
	}

	let Predicates = [HasFP16, HasVLX] in {
	defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
	EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
	defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
	EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
	}
	}
	defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
	SchedWriteFAdd>, NotEVEX2VEXConvertible;

	//===----------------------------------------------------------------------===//
	// AVX-512 VPTESTM instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	// NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
	// There are just too many permutations due to commutability and bitcasts.
	let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
	defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(null_frag), (null_frag), 1>,
	EVEX_4V, Sched<[sched]>;
	let mayLoad = 1 in
	defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(null_frag), (null_frag)>,
	EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
	defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr,
	(null_frag), (null_frag)>,
	EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
	avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
	avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
	defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
	avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
	}
	}

	multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched> {
	defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
	avx512vl_i32_info>;
	defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
	avx512vl_i64_info>, VEX_W;
	}

	multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched> {
	let Predicates = [HasBWI] in {
	defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
	v32i16_info>, EVEX_V512, VEX_W;
	defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
	v64i8_info>, EVEX_V512;
	}

	let Predicates = [HasVLX, HasBWI] in {
	defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
	v16i16x_info>, EVEX_V256, VEX_W;
	defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
	v8i16x_info>, EVEX_V128, VEX_W;
	defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
	v32i8x_info>, EVEX_V256;
	defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
	v16i8x_info>, EVEX_V128;
	}
	}

	multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
	X86SchedWriteWidths sched> :
	avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
	avx512_vptest_dq<opc_dq, OpcodeStr, sched>;

	defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
	SchedWriteVecLogic>, T8PD;
	defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
	SchedWriteVecLogic>, T8XS;

	//===----------------------------------------------------------------------===//
	// AVX-512 Shift instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
	(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
	Sched<[sched]>;
	defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
	(i8 timm:$src2)))>,
	Sched<[sched.Folded]>;
	}
	}

	multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
	"$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
	(_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
	EVEX_B, Sched<[sched.Folded]>;
	}

	multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, ValueType SrcVT,
	X86VectorVTInfo _> {
	// src2 is always 128-bit
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
	AVX512BIBase, EVEX_4V, Sched<[sched]>;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
	AVX512BIBase,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, ValueType SrcVT,
	AVX512VLVectorVTInfo VTInfo,
	Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
	VTInfo.info512>, EVEX_V512,
	EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
	VTInfo.info256>, EVEX_V256,
	EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
	defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
	VTInfo.info128>, EVEX_V128,
	EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
	string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched,
	bit NotEVEX2VEXConvertibleQ = 0> {
	defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
	avx512vl_i32_info, HasAVX512>;
	let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
	defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
	avx512vl_i64_info, HasAVX512>, VEX_W;
	defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
	avx512vl_i16_info, HasBWI>;
	}

	multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasAVX512] in
	defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched.ZMM, VTInfo.info512>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
	VTInfo.info512>, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched.YMM, VTInfo.info256>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
	VTInfo.info256>, EVEX_V256;
	defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched.XMM, VTInfo.info128>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
	VTInfo.info128>, EVEX_V128;
	}
	}

	multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	let Predicates = [HasBWI] in
	defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
	let Predicates = [HasVLX, HasBWI] in {
	defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
	defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
	}
	}

	multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
	Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched,
	bit NotEVEX2VEXConvertibleQ = 0> {
	defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
	sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
	defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
	sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
	}

	defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
	SchedWriteVecShiftImm>,
	avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
	SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;

	defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
	SchedWriteVecShiftImm>,
	avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
	SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;

	defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
	SchedWriteVecShiftImm, 1>,
	avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
	SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;

	defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
	SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
	defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
	SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;

	defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
	SchedWriteVecShift>;
	defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
	SchedWriteVecShift, 1>;
	defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
	SchedWriteVecShift>;

	// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	VR128X:$src2)), sub_ymm)>;

	def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	VR128X:$src2)), sub_xmm)>;

	def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	timm:$src2)), sub_ymm)>;

	def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	timm:$src2)), sub_xmm)>;
	}

	//===-------------------------------------------------------------------===//
	// Variable Bit Shifts
	//===-------------------------------------------------------------------===//

	multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
	AVX5128IBase, EVEX_4V, Sched<[sched]>;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1,
	(_.VT (_.LdFrag addr:$src2))))>,
	AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr,
	(_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
	AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
	defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
	}
	}

	multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched> {
	defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
	avx512vl_i32_info>;
	defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
	avx512vl_i64_info>, VEX_W;
	}

	// Use 512bit version to implement 128/256 bit in case NoVLX.
	multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
	SDNode OpNode, list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
	(_.info256.VT _.info256.RC:$src2))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(OpcodeStr#"Zrr")
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
	sub_ymm)>;

	def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
	(_.info128.VT _.info128.RC:$src2))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(OpcodeStr#"Zrr")
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
	sub_xmm)>;
	}
	}
	multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched> {
	let Predicates = [HasBWI] in
	defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
	EVEX_V512, VEX_W;
	let Predicates = [HasVLX, HasBWI] in {

	defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
	EVEX_V256, VEX_W;
	defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
	EVEX_V128, VEX_W;
	}
	}

	defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
	avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;

	defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
	avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;

	defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
	avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;

	defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
	defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;

	defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
	defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
	defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
	defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;


	// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
	sub_xmm)>;
	def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
	sub_ymm)>;

	def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
	sub_xmm)>;
	def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
	sub_ymm)>;

	def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	timm:$src2)), sub_xmm)>;
	def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	timm:$src2)), sub_ymm)>;

	def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	timm:$src2)), sub_xmm)>;
	def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	timm:$src2)), sub_ymm)>;
	}

	// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
	sub_xmm)>;
	def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
	sub_ymm)>;

	def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
	sub_xmm)>;
	def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
	sub_ymm)>;

	def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	timm:$src2)), sub_xmm)>;
	def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	timm:$src2)), sub_ymm)>;

	def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	timm:$src2)), sub_xmm)>;
	def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	timm:$src2)), sub_ymm)>;
	}

	//===-------------------------------------------------------------------===//
	// 1-src variable permutation VPERMW/D/Q
	//===-------------------------------------------------------------------===//

	multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in
	defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
	}

	multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasAVX512] in
	defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched, VTInfo.info512>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	sched, VTInfo.info512>, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in
	defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	sched, VTInfo.info256>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	sched, VTInfo.info256>, EVEX_V256;
	}

	multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
	Predicate prd, SDNode OpNode,
	X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
	let Predicates = [prd] in
	defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
	EVEX_V512 ;
	let Predicates = [HasVLX, prd] in {
	defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
	EVEX_V256 ;
	defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
	EVEX_V128 ;
	}
	}

	defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
	WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
	defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
	WriteVarShuffle256, avx512vl_i8_info>;

	defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
	WriteVarShuffle256, avx512vl_i32_info>;
	defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
	WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
	defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
	WriteFVarShuffle256, avx512vl_f32_info>;
	defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
	WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;

	defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
	X86VPermi, WriteShuffle256, avx512vl_i64_info>,
	EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
	X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
	EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;

	//===----------------------------------------------------------------------===//
	// AVX-512 - VPERMIL
	//===----------------------------------------------------------------------===//

	multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	X86VectorVTInfo Ctrl> {
	defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1,
	(Ctrl.VT Ctrl.RC:$src2)))>,
	T8PD, EVEX_4V, Sched<[sched]>;
	defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode
	_.RC:$src1,
	(Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
	T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr,
	(_.VT (OpNode
	_.RC:$src1,
	(Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
	T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _,
	AVX512VLVectorVTInfo Ctrl> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
	_.info512, Ctrl.info512>, EVEX_V512;
	}
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
	_.info128, Ctrl.info128>, EVEX_V128;
	defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
	_.info256, Ctrl.info256>, EVEX_V256;
	}
	}

	multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
	AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
	defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
	_, Ctrl>;
	defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
	X86VPermilpi, SchedWriteFShuffle, _>,
	EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}

	let ExeDomain = SSEPackedSingle in
	defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
	avx512vl_i32_info>;
	let ExeDomain = SSEPackedDouble in
	defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
	avx512vl_i64_info>, VEX_W1X;

	//===----------------------------------------------------------------------===//
	// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
	//===----------------------------------------------------------------------===//

	defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
	X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
	EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
	defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
	X86PShufhw, SchedWriteShuffle>,
	EVEX, AVX512XSIi8Base;
	defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
	X86PShuflw, SchedWriteShuffle>,
	EVEX, AVX512XDIi8Base;

	//===----------------------------------------------------------------------===//
	// AVX-512 - VPSHUFB
	//===----------------------------------------------------------------------===//

	multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	let Predicates = [HasBWI] in
	defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
	EVEX_V512;

	let Predicates = [HasVLX, HasBWI] in {
	defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
	EVEX_V256;
	defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
	EVEX_V128;
	}
	}

	defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
	SchedWriteVarShuffle>, VEX_WIG;

	//===----------------------------------------------------------------------===//
	// Move Low to High and High to Low packed FP Instructions
	//===----------------------------------------------------------------------===//

	def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovlhps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
	Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
	let isCommutable = 1 in
	def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovhlps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
	Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;

	//===----------------------------------------------------------------------===//
	// VMOVHPS/PD VMOVLPS Instructions
	// All patterns was taken from SSS implementation.
	//===----------------------------------------------------------------------===//

	multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode,
	X86VectorVTInfo _> {
	let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
	def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, f64mem:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst,
	(OpNode _.RC:$src1,
	(_.VT (bitconvert
	(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
	Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
	}

	// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
	// SSE1. And MOVLPS pattern is even more complex.
	defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
	v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
	defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
	v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
	defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
	v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
	defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
	v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;

	let Predicates = [HasAVX512] in {
	// VMOVHPD patterns
	def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
	(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;

	// VMOVLPD patterns
	def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
	(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
	}

	let SchedRW = [WriteFStore] in {
	let mayStore = 1, hasSideEffects = 0 in
	def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovhps\t{$src, $dst\|$dst, $src}",
	[]>, EVEX, EVEX_CD8<32, CD8VT2>;
	def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovhpd\t{$src, $dst\|$dst, $src}",
	[(store (f64 (extractelt
	(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
	(iPTR 0))), addr:$dst)]>,
	EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
	let mayStore = 1, hasSideEffects = 0 in
	def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovlps\t{$src, $dst\|$dst, $src}",
	[]>, EVEX, EVEX_CD8<32, CD8VT2>;
	def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovlpd\t{$src, $dst\|$dst, $src}",
	[(store (f64 (extractelt (v2f64 VR128X:$src),
	(iPTR 0))), addr:$dst)]>,
	EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
	} // SchedRW

	let Predicates = [HasAVX512] in {
	// VMOVHPD patterns
	def : Pat<(store (f64 (extractelt
	(v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
	(iPTR 0))), addr:$dst),
	(VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
	}
	//===----------------------------------------------------------------------===//
	// FMA - Fused Multiply Operations
	//

	multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
	(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
	EVEX_4V, Sched<[sched]>;

	defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
	(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(OpNode _.RC:$src2,
	_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
	(MaskOpNode _.RC:$src2,
	_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
	EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR] in
	defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
	(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
	EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
	}

	multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _,
	Predicate prd = HasAVX512> {
	let Predicates = [prd] in {
	defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.ZMM, _.info512>,
	avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
	_.info512>,
	EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, prd] in {
	defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.YMM, _.info256>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.XMM, _.info128>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd> {
	defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f16_info, HasFP16>, T_MAP6PD;
	defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f32_info>, T8PD;
	defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f64_info>, T8PD, VEX_W;
	}

	defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
	fma, X86FmaddRnd>;
	defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
	X86Fmsub, X86FmsubRnd>;
	defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
	X86Fmaddsub, X86FmaddsubRnd>;
	defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
	X86Fmsubadd, X86FmsubaddRnd>;
	defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
	X86Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
	X86Fnmsub, X86FnmsubRnd>;


	multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(null_frag),
	(_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
	EVEX_4V, Sched<[sched]>;

	defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
	(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
	"$src2, ${src3}"#_.BroadcastStr,
	(_.VT (OpNode _.RC:$src2,
	(_.VT (_.BroadcastLdFrag addr:$src3)),
	_.RC:$src1)),
	(_.VT (MaskOpNode _.RC:$src2,
	(_.VT (_.BroadcastLdFrag addr:$src3)),
	_.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR] in
	defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(null_frag),
	(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
	1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
	}

	multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _,
	Predicate prd = HasAVX512> {
	let Predicates = [prd] in {
	defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.ZMM, _.info512>,
	avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
	_.info512>,
	EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, prd] in {
	defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.YMM, _.info256>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.XMM, _.info128>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd > {
	defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f16_info, HasFP16>, T_MAP6PD;
	defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f32_info>, T8PD;
	defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f64_info>, T8PD, VEX_W;
	}

	defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
	fma, X86FmaddRnd>;
	defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
	X86Fmsub, X86FmsubRnd>;
	defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
	X86Fmaddsub, X86FmaddsubRnd>;
	defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
	X86Fmsubadd, X86FmsubaddRnd>;
	defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
	X86Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
	X86Fnmsub, X86FnmsubRnd>;

	multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(null_frag),
	(_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
	EVEX_4V, Sched<[sched]>;

	// Pattern is 312 order so that the load is in a different place from the
	// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
	defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
	(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;

	// Pattern is 312 order so that the load is in a different place from the
	// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
	defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
	"$src2, ${src3}"#_.BroadcastStr,
	(_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
	_.RC:$src1, _.RC:$src2)),
	(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
	_.RC:$src1, _.RC:$src2)), 1, 0>,
	EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
	Uses = [MXCSR] in
	defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(null_frag),
	(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
	1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
	}

	multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _,
	Predicate prd = HasAVX512> {
	let Predicates = [prd] in {
	defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.ZMM, _.info512>,
	avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
	_.info512>,
	EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, prd] in {
	defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.YMM, _.info256>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.XMM, _.info128>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd > {
	defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f16_info, HasFP16>, T_MAP6PD;
	defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f32_info>, T8PD;
	defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
	OpNodeRnd, SchedWriteFMA,
	avx512vl_f64_info>, T8PD, VEX_W;
	}

	defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
	fma, X86FmaddRnd>;
	defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
	X86Fmsub, X86FmsubRnd>;
	defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
	X86Fmaddsub, X86FmaddsubRnd>;
	defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
	X86Fmsubadd, X86FmsubaddRnd>;
	defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
	X86Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
	X86Fnmsub, X86FnmsubRnd>;

	// Scalar FMA
	multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
	let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
	defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
	"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
	EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;

	let mayLoad = 1 in
	defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
	"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
	EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;

	let Uses = [MXCSR] in
	defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
	EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;

	let isCodeGenOnly = 1, isCommutable = 1 in {
	def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
	!strconcat(OpcodeStr,
	"\t{$src3, $src2, $dst\|$dst, $src2, $src3}"),
	!if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
	def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
	!strconcat(OpcodeStr,
	"\t{$src3, $src2, $dst\|$dst, $src2, $src3}"),
	[RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;

	let Uses = [MXCSR] in
	def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
	!strconcat(OpcodeStr,
	"\t{$rc, $src3, $src2, $dst\|$dst, $src2, $src3, $rc}"),
	!if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
	Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
	}// isCodeGenOnly = 1
	}// Constraints = "$src1 = $dst"
	}

	multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
	string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
	X86VectorVTInfo _, string SUFF> {
	let ExeDomain = _.ExeDomain in {
	defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
	// Operands for intrinsic are in 123 order to preserve passthu
	// semantics.
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
	_.FRC:$src3))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
	(_.ScalarLdFrag addr:$src3)))),
	(set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
	_.FRC:$src3, (i32 timm:$rc)))), 0>;

	defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
	_.FRC:$src1))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
	(_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
	(set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
	_.FRC:$src1, (i32 timm:$rc)))), 1>;

	// One pattern is 312 order so that the load is in a different place from the
	// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
	defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
	_.FRC:$src2))),
	(set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
	_.FRC:$src1, _.FRC:$src2))),
	(set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
	_.FRC:$src2, (i32 timm:$rc)))), 1>;
	}
	}

	multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
	string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasAVX512] in {
	defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
	OpNodeRnd, f32x_info, "SS">,
	EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
	defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
	OpNodeRnd, f64x_info, "SD">,
	EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
	}
	let Predicates = [HasFP16] in {
	defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
	OpNodeRnd, f16x_info, "SH">,
	EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
	}
	}

	defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
	defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
	defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;

	multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
	SDNode RndOp, string Prefix,
	string Suffix, SDNode Move,
	X86VectorVTInfo _, PatLeaf ZeroFP,
	Predicate prd = HasAVX512> {
	let Predicates = [prd] in {
	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(Op _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src3))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(Op _.FRC:$src2, _.FRC:$src3,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(Op _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src3)))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
	(!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src3),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src3)),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src3), _.FRC:$src2),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2, _.FRC:$src3,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src3),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2, _.FRC:$src3,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src3)),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;

	// Patterns with rounding mode.
	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(RndOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src3, (i32 timm:$rc)))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(RndOp _.FRC:$src2, _.FRC:$src3,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(i32 timm:$rc)))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
	VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(RndOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src3, (i32 timm:$rc)),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(RndOp _.FRC:$src2, _.FRC:$src3,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(i32 timm:$rc)),
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(RndOp _.FRC:$src2,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src3, (i32 timm:$rc)),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;

	def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(RndOp _.FRC:$src2, _.FRC:$src3,
	(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(i32 timm:$rc)),
	(_.EltVT ZeroFP)))))),
	(!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
	VR128X:$src1, VK1WM:$mask,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
	}
	}
	defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
	X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
	defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
	X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
	defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
	X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
	defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
	X86Movsh, v8f16x_info, fp16imm0, HasFP16>;

	defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
	"SS", X86Movss, v4f32x_info, fp32imm0>;
	defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
	"SS", X86Movss, v4f32x_info, fp32imm0>;
	defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
	"SS", X86Movss, v4f32x_info, fp32imm0>;
	defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
	"SS", X86Movss, v4f32x_info, fp32imm0>;

	defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
	"SD", X86Movsd, v2f64x_info, fp64imm0>;
	defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
	"SD", X86Movsd, v2f64x_info, fp64imm0>;
	defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
	"SD", X86Movsd, v2f64x_info, fp64imm0>;
	defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
	"SD", X86Movsd, v2f64x_info, fp64imm0>;

	//===----------------------------------------------------------------------===//
	// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
	//===----------------------------------------------------------------------===//
	let Constraints = "$src1 = $dst" in {
	multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	// NOTE: The SDNode have the multiply operands first with the add last.
	// This enables commuted load patterns to be autogenerated by tablegen.
	let ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
	T8PD, EVEX_4V, Sched<[sched]>;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
	T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(OpNode _.RC:$src2,
	(_.VT (_.BroadcastLdFrag addr:$src3)),
	_.RC:$src1)>,
	T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}
	} // Constraints = "$src1 = $dst"

	multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
	let Predicates = [HasIFMA] in {
	defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
	EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, HasIFMA] in {
	defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
	SchedWriteVecIMul, avx512vl_i64_info>,
	VEX_W;
	defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
	SchedWriteVecIMul, avx512vl_i64_info>,
	VEX_W;

	//===----------------------------------------------------------------------===//
	// AVX-512 Scalar convert from sign integer to float/double
	//===----------------------------------------------------------------------===//

	multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
	RegisterClass SrcRC, X86VectorVTInfo DstVT,
	X86MemOperand x86memop, PatFrag ld_frag, string asm,
	string mem, list<Register> _Uses = [MXCSR],
	bit _mayRaiseFPException = 1> {
	let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
	mayRaiseFPException = _mayRaiseFPException in {
	let hasSideEffects = 0, isCodeGenOnly = 1 in {
	def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
	(ins DstVT.FRC:$src1, SrcRC:$src),
	!strconcat(asm,"\t{$src, $src1, $dst\|$dst, $src1, $src}"), []>,
	EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
	let mayLoad = 1 in
	def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
	(ins DstVT.FRC:$src1, x86memop:$src),
	asm#"{"#mem#"}\t{$src, $src1, $dst\|$dst, $src1, $src}", []>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	} // hasSideEffects = 0
	def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
	(ins DstVT.RC:$src1, SrcRC:$src2),
	!strconcat(asm,"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set DstVT.RC:$dst,
	(OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
	EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;

	def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
	(ins DstVT.RC:$src1, x86memop:$src2),
	asm#"{"#mem#"}\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set DstVT.RC:$dst,
	(OpNode (DstVT.VT DstVT.RC:$src1),
	(ld_frag addr:$src2)))]>,
	EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	(!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
	DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
	}

	multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
	X86FoldableSchedWrite sched, RegisterClass SrcRC,
	X86VectorVTInfo DstVT, string asm,
	string mem> {
	let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
	def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
	(ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
	!strconcat(asm,
	"\t{$src2, $rc, $src1, $dst\|$dst, $src1, $rc, $src2}"),
	[(set DstVT.RC:$dst,
	(OpNode (DstVT.VT DstVT.RC:$src1),
	SrcRC:$src2,
	(i32 timm:$rc)))]>,
	EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
	def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst\|$dst, $src1, $rc, $src2}",
	(!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
	DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
	}

	multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
	X86FoldableSchedWrite sched,
	RegisterClass SrcRC, X86VectorVTInfo DstVT,
	X86MemOperand x86memop, PatFrag ld_frag,
	string asm, string mem> {
	defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
	avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
	ld_frag, asm, mem>, VEX_LIG;
	}

	let Predicates = [HasAVX512] in {
	defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
	WriteCvtI2SS, GR32,
	v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
	WriteCvtI2SS, GR64,
	v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
	XS, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
	v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
	XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
	WriteCvtI2SD, GR64,
	v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
	def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;

	def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
	(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
	(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
	(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
	(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;

	def : Pat<(f32 (any_sint_to_fp GR32:$src)),
	(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f32 (any_sint_to_fp GR64:$src)),
	(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
	def : Pat<(f64 (any_sint_to_fp GR32:$src)),
	(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f64 (any_sint_to_fp GR64:$src)),
	(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;

	defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
	WriteCvtI2SS, GR32,
	v4f32x_info, i32mem, loadi32,
	"cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
	WriteCvtI2SS, GR64,
	v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
	XS, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
	i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
	XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
	WriteCvtI2SD, GR64,
	v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
	def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;

	def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
	(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
	(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
	(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
	(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;

	def : Pat<(f32 (any_uint_to_fp GR32:$src)),
	(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f32 (any_uint_to_fp GR64:$src)),
	(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
	def : Pat<(f64 (any_uint_to_fp GR32:$src)),
	(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f64 (any_uint_to_fp GR64:$src)),
	(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 Scalar convert from float/double to integer
	//===----------------------------------------------------------------------===//

	multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
	X86VectorVTInfo DstVT, SDNode OpNode,
	SDNode OpNodeRnd,
	X86FoldableSchedWrite sched, string asm,
	string aliasStr, Predicate prd = HasAVX512> {
	let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
	def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
	EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
	let Uses = [MXCSR] in
	def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
	!strconcat(asm,"\t{$rc, $src, $dst\|$dst, $src, $rc}"),
	[(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
	EVEX, VEX_LIG, EVEX_B, EVEX_RC,
	Sched<[sched]>;
	def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set DstVT.RC:$dst, (OpNode
	(SrcVT.ScalarIntMemFrags addr:$src)))]>,
	EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	} // Predicates = [prd]

	def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
	def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst\|$dst, $src, $rc}",
	(!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
	def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
	SrcVT.IntScalarMemOp:$src), 0, "att">;
	}

	// Convert float/double to signed/unsigned int 32/64
	defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
	X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
	X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
	XS, VEX_W, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
	X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
	X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
	XS, VEX_W, EVEX_CD8<32, CD8VT1>;
	defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
	X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
	XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
	X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
	X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
	XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
	X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
	X86VectorVTInfo DstVT, SDNode OpNode,
	X86FoldableSchedWrite sched> {
	let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
	let isCodeGenOnly = 1 in {
	def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
	EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
	def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
	EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}
	} // Predicates = [HasAVX512]
	}

	defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
	lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
	llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
	lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
	llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;

	let Predicates = [HasAVX512] in {
	def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
	def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;

	def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
	def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
	}

	// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
	// which produce unnecessary vmovs{s,d} instructions
	let Predicates = [HasAVX512] in {
	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
	(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
	(VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
	(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
	(VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
	(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
	(VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
	(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
	(VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
	(VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
	(VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
	(VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
	(VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
	(VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
	(VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
	(VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
	(VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
	} // Predicates = [HasAVX512]

	// Convert float/double to signed/unsigned int 32/64 with truncation
	multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
	X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
	SDNode OpNodeInt, SDNode OpNodeSAE,
	X86FoldableSchedWrite sched, string aliasStr,
	Predicate prd = HasAVX512> {
	let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
	let isCodeGenOnly = 1 in {
	def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
	EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
	def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
	EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}

	def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
	EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
	let Uses = [MXCSR] in
	def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
	!strconcat(asm,"\t{{sae}, $src, $dst\|$dst, $src, {sae}}"),
	[(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
	EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
	def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
	(ins _SrcRC.IntScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst,
	(OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
	EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	} // Predicates = [prd]

	def : InstAlias<asm # aliasStr # "\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
	def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst\|$dst, $src, {sae}}",
	(!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
	def : InstAlias<asm # aliasStr # "\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
	_SrcRC.IntScalarMemOp:$src), 0, "att">;
	}

	defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
	any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
	"{l}">, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
	any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
	"{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
	any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
	"{l}">, XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
	any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
	"{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;

	defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
	any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
	"{l}">, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
	any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
	"{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
	any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
	"{l}">, XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
	any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
	"{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	//===----------------------------------------------------------------------===//
	// AVX-512 Convert form float to double and back
	//===----------------------------------------------------------------------===//

	let Uses = [MXCSR], mayRaiseFPException = 1 in
	multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNode,
	X86FoldableSchedWrite sched> {
	defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode (_.VT _.RC:$src1),
	(_Src.VT _Src.RC:$src2)))>,
	EVEX_4V, VEX_LIG, Sched<[sched]>;
	defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode (_.VT _.RC:$src1),
	(_Src.ScalarIntMemFrags addr:$src2)))>,
	EVEX_4V, VEX_LIG,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	let isCodeGenOnly = 1, hasSideEffects = 0 in {
	def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _Src.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	EVEX_4V, VEX_LIG, Sched<[sched]>;
	let mayLoad = 1 in
	def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	// Scalar Conversion with SAE - suppress all exceptions
	multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeSAE,
	X86FoldableSchedWrite sched> {
	let Uses = [MXCSR] in
	defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(_.VT (OpNodeSAE (_.VT _.RC:$src1),
	(_Src.VT _Src.RC:$src2)))>,
	EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
	}

	// Scalar Conversion with rounding control (RC)
	multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeRnd,
	X86FoldableSchedWrite sched> {
	let Uses = [MXCSR] in
	defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(_.VT (OpNodeRnd (_.VT _.RC:$src1),
	(_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
	EVEX_4V, VEX_LIG, Sched<[sched]>,
	EVEX_B, EVEX_RC;
	}
	multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _src, X86VectorVTInfo _dst,
	Predicate prd = HasAVX512> {
	let Predicates = [prd], ExeDomain = SSEPackedSingle in {
	defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
	avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
	OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
	}
	}

	multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeSAE,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _src, X86VectorVTInfo _dst,
	Predicate prd = HasAVX512> {
	let Predicates = [prd], ExeDomain = SSEPackedSingle in {
	defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
	avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
	EVEX_CD8<_src.EltSize, CD8VT1>;
	}
	}
	defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
	X86froundsRnd, WriteCvtSD2SS, f64x_info,
	f32x_info>, XD, VEX_W;
	defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
	X86fpextsSAE, WriteCvtSS2SD, f32x_info,
	f64x_info>, XS;
	defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
	X86froundsRnd, WriteCvtSD2SS, f64x_info,
	f16x_info, HasFP16>, T_MAP5XD, VEX_W;
	defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
	X86fpextsSAE, WriteCvtSS2SD, f16x_info,
	f64x_info, HasFP16>, T_MAP5XS;
	defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
	X86froundsRnd, WriteCvtSD2SS, f32x_info,
	f16x_info, HasFP16>, T_MAP5PS;
	defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
	X86fpextsSAE, WriteCvtSS2SD, f16x_info,
	f32x_info, HasFP16>, T_MAP6PS;

	def : Pat<(f64 (any_fpextend FR32X:$src)),
	(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
	Requires<[HasAVX512]>;
	def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
	(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
	Requires<[HasAVX512, OptForSize]>;

	def : Pat<(f32 (any_fpround FR64X:$src)),
	(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
	Requires<[HasAVX512]>;

	def : Pat<(f32 (any_fpextend FR16X:$src)),
	(VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
	Requires<[HasFP16]>;
	def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
	(VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
	Requires<[HasFP16, OptForSize]>;

	def : Pat<(f64 (any_fpextend FR16X:$src)),
	(VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
	Requires<[HasFP16]>;
	def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
	(VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
	Requires<[HasFP16, OptForSize]>;

	def : Pat<(f16 (any_fpround FR32X:$src)),
	(VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
	Requires<[HasFP16]>;
	def : Pat<(f16 (any_fpround FR64X:$src)),
	(VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
	Requires<[HasFP16]>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector
	(f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
	(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
	Requires<[HasAVX512]>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector
	(f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
	(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
	Requires<[HasAVX512]>;

	//===----------------------------------------------------------------------===//
	// AVX-512 Vector convert from signed/unsigned integer to float/double
	// and from float/double to signed/unsigned integer
	//===----------------------------------------------------------------------===//

	multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
	X86FoldableSchedWrite sched,
	string Broadcast = _.BroadcastStr,
	string Alias = "", X86MemOperand MemOp = _Src.MemOp,
	RegisterClass MaskRC = _.KRCWM,
	dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
	dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
	let Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _Src.RC:$src),
	(ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
	(ins MaskRC:$mask, _Src.RC:$src),
	OpcodeStr, "$src", "$src",
	(_.VT (OpNode (_Src.VT _Src.RC:$src))),
	(vselect_mask MaskRC:$mask,
	(_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
	_.RC:$src0),
	(vselect_mask MaskRC:$mask,
	(_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
	_.ImmAllZerosV)>,
	EVEX, Sched<[sched]>;

	defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins MemOp:$src),
	(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
	(ins MaskRC:$mask, MemOp:$src),
	OpcodeStr#Alias, "$src", "$src",
	LdDAG,
	(vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
	(vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
	EVEX, Sched<[sched.Folded]>;

	defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _Src.ScalarMemOp:$src),
	(ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
	(ins MaskRC:$mask, _Src.ScalarMemOp:$src),
	OpcodeStr,
	"${src}"#Broadcast, "${src}"#Broadcast,
	(_.VT (OpNode (_Src.VT
	(_Src.BroadcastLdFrag addr:$src))
	)),
	(vselect_mask MaskRC:$mask,
	(_.VT
	(MaskOpNode
	(_Src.VT
	(_Src.BroadcastLdFrag addr:$src)))),
	_.RC:$src0),
	(vselect_mask MaskRC:$mask,
	(_.VT
	(MaskOpNode
	(_Src.VT
	(_Src.BroadcastLdFrag addr:$src)))),
	_.ImmAllZerosV)>,
	EVEX, EVEX_B, Sched<[sched.Folded]>;
	}
	}
	// Conversion with SAE - suppress all exceptions
	multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeSAE,
	X86FoldableSchedWrite sched> {
	let Uses = [MXCSR] in
	defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _Src.RC:$src), OpcodeStr,
	"{sae}, $src", "$src, {sae}",
	(_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
	EVEX, EVEX_B, Sched<[sched]>;
	}

	// Conversion with rounding control (RC)
	multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
	X86FoldableSchedWrite sched> {
	let Uses = [MXCSR] in
	defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
	"$rc, $src", "$src, $rc",
	(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
	EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
	}

	// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
	multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDPatternOperator OpNode,
	SDNode MaskOpNode,
	X86FoldableSchedWrite sched,
	string Broadcast = _.BroadcastStr,
	string Alias = "", X86MemOperand MemOp = _Src.MemOp,
	RegisterClass MaskRC = _.KRCWM>
	: avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
	Alias, MemOp, MaskRC,
	(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
	(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;

	// Extend [Float to Double, Half to Float]
	multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
	X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
	let Predicates = [prd] in {
	defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
	any_fpextend, fpextend, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
	X86vfpextSAE, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
	X86any_vfpext, X86vfpext, sched.XMM,
	_dst.info128.BroadcastStr,
	"", f64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
	any_fpextend, fpextend, sched.YMM>, EVEX_V256;
	}
	}

	// Truncate [Double to Float, Float to Half]
	multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
	X86SchedWriteWidths sched, Predicate prd = HasAVX512,
	PatFrag bcast128 = _src.info128.BroadcastLdFrag,
	PatFrag loadVT128 = _src.info128.LdFrag,
	RegisterClass maskRC128 = _src.info128.KRCWM> {
	let Predicates = [prd] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
	X86any_vfpround, X86vfpround, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
	X86vfproundRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
	null_frag, null_frag, sched.XMM,
	_src.info128.BroadcastStr, "{x}",
	f128mem, maskRC128>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
	X86any_vfpround, X86vfpround,
	sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;

	// Special patterns to allow use of X86vmfpround for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
	def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
	maskRC128:$mask),
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
	def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
	maskRC128:$mask),
	(!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;

	def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
	(!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
	def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
	maskRC128:$mask),
	(!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
	def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
	maskRC128:$mask),
	(!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;

	def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
	(!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
	def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
	(_dst.info128.VT VR128X:$src0), maskRC128:$mask),
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
	def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
	_dst.info128.ImmAllZerosV, maskRC128:$mask),
	(!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
	}

	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst\|$dst, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
	VK2WM:$mask, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
	VK2WM:$mask, f64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst\|$dst, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
	VK4WM:$mask, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
	VK4WM:$mask, f64mem:$src), 0, "att">;
	}

	defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
	avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
	VEX_W, PD, EVEX_CD8<64, CD8VF>;
	defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
	avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
	PS, EVEX_CD8<32, CD8VH>;

	// Extend Half to Double
	multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
	any_fpextend, fpextend, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
	X86vfpextSAE, sched.ZMM>, EVEX_V512;
	def : Pat<(v8f64 (extloadv8f16 addr:$src)),
	(!cast<Instruction>(NAME # "Zrm") addr:$src)>;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
	X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
	f32mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
	X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
	f64mem>, EVEX_V256;
	}
	}

	// Truncate Double to Half
	multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
	X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
	X86vfproundRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
	null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
	VK2WM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
	null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
	VK4WM>, EVEX_V256;
	}
	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
	VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst\|$dst, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
	VK2WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
	VK2WM:$mask, i64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
	VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}\|"
	"$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst\|$dst, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
	VK4WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
	VK4WM:$mask, i64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"z\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
	VR512:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}\|"
	"$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
	VK8WM:$mask, VR512:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
	VK8WM:$mask, VR512:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst\|$dst, ${src}{1to8}}",
	(!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to8}}",
	(!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
	VK8WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to8}}",
	(!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
	VK8WM:$mask, i64mem:$src), 0, "att">;
	}

	defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
	avx512vl_f32_info, SchedWriteCvtPD2PS,
	HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
	defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
	avx512vl_f16_info, SchedWriteCvtPS2PD,
	HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
	defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
	VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
	defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
	T_MAP5PS, EVEX_CD8<16, CD8VQ>;

	let Predicates = [HasFP16, HasVLX] in {
	// Special patterns to allow use of X86vmfpround for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
	(VCVTPD2PHZ256rr VR256X:$src)>;
	def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
	VK4WM:$mask)),
	(VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
	def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;

	def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
	(VCVTPD2PHZ256rm addr:$src)>;
	def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
	(VCVTPD2PHZ256rmb addr:$src)>;
	def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
	(v8f16 VR128X:$src0), VK4WM:$mask),
	(VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
	v8f16x_info.ImmAllZerosV, VK4WM:$mask),
	(VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
	(VCVTPD2PHZ128rr VR128X:$src)>;
	def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
	VK2WM:$mask),
	(VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
	(VCVTPD2PHZ128rm addr:$src)>;
	def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
	VK2WM:$mask),
	(VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
	(VCVTPD2PHZ128rmb addr:$src)>;
	def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
	(v8f16 VR128X:$src0), VK2WM:$mask),
	(VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
	v8f16x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
	}

	// Convert Signed/Unsigned Doubleword to Double
	let Uses = []<Register>, mayRaiseFPException = 0 in
	multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDPatternOperator OpNode128,
	SDNode MaskOpNode128,
	X86SchedWriteWidths sched> {
	// No rounding in this op
	let Predicates = [HasAVX512] in
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
	MaskOpNode, sched.ZMM>, EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
	OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
	"", i64mem, VK2WM,
	(v2f64 (OpNode128 (bc_v4i32
	(v2i64
	(scalar_to_vector (loadi64 addr:$src)))))),
	(v2f64 (MaskOpNode128 (bc_v4i32
	(v2i64
	(scalar_to_vector (loadi64 addr:$src))))))>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Signed/Unsigned Doubleword to Float
	multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
	MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Doubleword with truncation
	multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode,
	SDNode OpNodeSAE, X86SchedWriteWidths sched> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
	OpNodeSAE, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
	MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Doubleword
	multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
	MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Double to Signed/Unsigned Doubleword with truncation
	multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeSAE,
	X86SchedWriteWidths sched> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
	OpNodeSAE, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	// we need "x"/"y" suffixes in order to distinguish between 128 and 256
	// memory forms of these instructions in Asm Parser. They have the same
	// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
	null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
	VK2WM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
	MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
	}

	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
	VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst\|$dst, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
	f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
	VK2WM:$mask, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
	VK2WM:$mask, f64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
	VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst\|$dst, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
	f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
	VK4WM:$mask, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
	VK4WM:$mask, f64mem:$src), 0, "att">;
	}

	// Convert Double to Signed/Unsigned Doubleword
	multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	// we need "x"/"y" suffixes in order to distinguish between 128 and 256
	// memory forms of these instructions in Asm Parcer. They have the same
	// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
	null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
	VK2WM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
	MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
	}

	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst\|$dst, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
	f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
	VK2WM:$mask, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
	VK2WM:$mask, f64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst\|$dst, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
	f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
	VK4WM:$mask, f64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
	VK4WM:$mask, f64mem:$src), 0, "att">;
	}

	// Convert Double to Signed/Unsigned Quardword
	multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
	MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Double to Signed/Unsigned Quardword with truncation
	multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
	MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Signed/Unsigned Quardword to Double
	multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
	MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
	}
	}

	// Convert Float to Signed/Unsigned Quardword
	multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	// Explicitly specified broadcast string, since we take only 2 elements
	// from v4f32x_info source
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
	MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
	(v2i64 (OpNode (bc_v4f32
	(v2f64
	(scalar_to_vector (loadf64 addr:$src)))))),
	(v2i64 (MaskOpNode (bc_v4f32
	(v2f64
	(scalar_to_vector (loadf64 addr:$src))))))>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Quardword with truncation
	multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	// Explicitly specified broadcast string, since we take only 2 elements
	// from v4f32x_info source
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
	MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
	(v2i64 (OpNode (bc_v4f32
	(v2f64
	(scalar_to_vector (loadf64 addr:$src)))))),
	(v2i64 (MaskOpNode (bc_v4f32
	(v2f64
	(scalar_to_vector (loadf64 addr:$src))))))>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Signed/Unsigned Quardword to Float
	// Also Convert Signed/Unsigned Doubleword to Half
	multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
	SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
	AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
	X86SchedWriteWidths sched, Predicate prd = HasDQI> {
	let Predicates = [prd] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	// we need "x"/"y" suffixes in order to distinguish between 128 and 256
	// memory forms of these instructions in Asm Parcer. They have the same
	// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
	null_frag, sched.XMM, _src.info128.BroadcastStr,
	"{x}", i128mem, _src.info128.KRCWM>,
	EVEX_V128, NotEVEX2VEXConvertible;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
	MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
	"{y}">, EVEX_V256,
	NotEVEX2VEXConvertible;

	// Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
	def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
	_src.info128.KRCWM:$mask),
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
	def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
	_src.info128.KRCWM:$mask),
	(!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;

	def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
	(!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
	def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
	_src.info128.KRCWM:$mask),
	(!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
	def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
	_src.info128.KRCWM:$mask),
	(!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;

	def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
	(!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
	def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
	(_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
	def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
	_dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
	(!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
	}

	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
	VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst\|$dst, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
	VK2WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
	VK2WM:$mask, i64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
	VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}\|"
	"$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst\|$dst, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
	VK4WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
	VK4WM:$mask, i64mem:$src), 0, "att">;
	}

	defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
	X86any_VSintToFP, X86VSintToFP,
	SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;

	defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
	X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
	PS, EVEX_CD8<32, CD8VF>;

	defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;

	defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	SchedWriteCvtPD2DQ>,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;

	defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	SchedWriteCvtPD2DQ>,
	PS, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
	uint_to_fp, X86any_VUintToFP, X86VUintToFP,
	SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;

	defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
	uint_to_fp, X86VUintToFpRnd,
	SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;

	defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
	EVEX_CD8<32, CD8VF>;

	defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
	VEX_W, EVEX_CD8<64, CD8VF>;

	defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
	PS, EVEX_CD8<32, CD8VF>;

	defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
	PS, EVEX_CD8<64, CD8VF>;

	defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
	EVEX_CD8<32, CD8VH>;

	defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
	EVEX_CD8<32, CD8VH>;

	defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	SchedWriteCvtPD2DQ>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	SchedWriteCvtPS2DQ>, PD,
	EVEX_CD8<32, CD8VH>;

	defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	SchedWriteCvtPD2DQ>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	SchedWriteCvtPS2DQ>, PD,
	EVEX_CD8<32, CD8VH>;

	defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
	sint_to_fp, X86VSintToFpRnd,
	SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;

	defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
	uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
	VEX_W, XS, EVEX_CD8<64, CD8VF>;

	defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
	X86any_VSintToFP, X86VMSintToFP,
	X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
	SchedWriteCvtDQ2PS, HasFP16>,
	T_MAP5PS, EVEX_CD8<32, CD8VF>;

	defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
	X86any_VUintToFP, X86VMUintToFP,
	X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
	SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
	EVEX_CD8<32, CD8VF>;

	defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
	X86any_VSintToFP, X86VMSintToFP,
	X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
	SchedWriteCvtDQ2PS>, VEX_W, PS,
	EVEX_CD8<64, CD8VF>;

	defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
	X86any_VUintToFP, X86VMUintToFP,
	X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
	SchedWriteCvtDQ2PS>, VEX_W, XD,
	EVEX_CD8<64, CD8VF>;

	let Predicates = [HasVLX] in {
	// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
	(VCVTPD2DQZ128rr VR128X:$src)>;
	def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
	(VCVTPD2DQZ128rm addr:$src)>;
	def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
	(VCVTPD2DQZ128rmb addr:$src)>;
	def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
	(v4i32 VR128X:$src0), VK2WM:$mask),
	(VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
	v4i32x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;

	// Special patterns to allow use of X86mcvttp2si for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
	(VCVTTPD2DQZ128rr VR128X:$src)>;
	def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
	(VCVTTPD2DQZ128rm addr:$src)>;
	def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
	(VCVTTPD2DQZ128rmb addr:$src)>;
	def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
	(v4i32 VR128X:$src0), VK2WM:$mask),
	(VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
	v4i32x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;

	// Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
	(VCVTPD2UDQZ128rr VR128X:$src)>;
	def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
	(VCVTPD2UDQZ128rm addr:$src)>;
	def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
	(VCVTPD2UDQZ128rmb addr:$src)>;
	def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
	(v4i32 VR128X:$src0), VK2WM:$mask),
	(VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
	v4i32x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;

	// Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
	(VCVTTPD2UDQZ128rr VR128X:$src)>;
	def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
	(VCVTTPD2UDQZ128rm addr:$src)>;
	def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
	VK2WM:$mask),
	(VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
	(VCVTTPD2UDQZ128rmb addr:$src)>;
	def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
	(v4i32 VR128X:$src0), VK2WM:$mask),
	(VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
	v4i32x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
	}

	let Predicates = [HasDQI, HasVLX] in {
	def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
	(VCVTPS2QQZ128rm addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	VR128X:$src0)),
	(VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	v2i64x_info.ImmAllZerosV)),
	(VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
	(VCVTPS2UQQZ128rm addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	VR128X:$src0)),
	(VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	v2i64x_info.ImmAllZerosV)),
	(VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
	(VCVTTPS2QQZ128rm addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	VR128X:$src0)),
	(VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	v2i64x_info.ImmAllZerosV)),
	(VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
	(VCVTTPS2UQQZ128rm addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	VR128X:$src0)),
	(VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
	(X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
	v2i64x_info.ImmAllZerosV)),
	(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
	}

	let Predicates = [HasVLX] in {
	def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
	(VCVTDQ2PDZ128rm addr:$src)>;
	def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
	(X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
	VR128X:$src0)),
	(VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
	(X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
	v2f64x_info.ImmAllZerosV)),
	(VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
	(VCVTUDQ2PDZ128rm addr:$src)>;
	def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
	(X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
	VR128X:$src0)),
	(VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
	(X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
	v2f64x_info.ImmAllZerosV)),
	(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// Half precision conversion instructions
	//===----------------------------------------------------------------------===//

	let Uses = [MXCSR], mayRaiseFPException = 1 in
	multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
	X86MemOperand x86memop, dag ld_dag,
	X86FoldableSchedWrite sched> {
	defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
	(ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
	(X86any_cvtph2ps (_src.VT _src.RC:$src)),
	(X86cvtph2ps (_src.VT _src.RC:$src))>,
	T8PD, Sched<[sched]>;
	defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
	(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
	(X86any_cvtph2ps (_src.VT ld_dag)),
	(X86cvtph2ps (_src.VT ld_dag))>,
	T8PD, Sched<[sched.Folded]>;
	}

	multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
	X86FoldableSchedWrite sched> {
	let Uses = [MXCSR] in
	defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
	(ins _src.RC:$src), "vcvtph2ps",
	"{sae}, $src", "$src, {sae}",
	(X86cvtph2psSAE (_src.VT _src.RC:$src))>,
	T8PD, EVEX_B, Sched<[sched]>;
	}

	let Predicates = [HasAVX512] in
	defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
	(load addr:$src), WriteCvtPH2PSZ>,
	avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
	EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;

	let Predicates = [HasVLX] in {
	defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
	(load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
	EVEX_CD8<32, CD8VH>;
	defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
	(bitconvert (v2i64 (X86vzload64 addr:$src))),
	WriteCvtPH2PS>, EVEX, EVEX_V128,
	EVEX_CD8<32, CD8VH>;

	// Pattern match vcvtph2ps of a scalar i64 load.
	def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
	(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
	(VCVTPH2PSZ128rm addr:$src)>;
	}

	multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
	X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
	let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
	(ins _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _dest.RC:$dst,
	(X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
	Sched<[RR]>;
	let Constraints = "$src0 = $dst" in
	def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
	(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _dest.RC:$dst,
	(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
	_dest.RC:$src0, _src.KRCWM:$mask))]>,
	Sched<[RR]>, EVEX_K;
	def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
	(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src1, $src2}",
	[(set _dest.RC:$dst,
	(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
	_dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
	Sched<[RR]>, EVEX_KZ;
	let hasSideEffects = 0, mayStore = 1 in {
	def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
	(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	Sched<[MR]>;
	def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
	(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}", []>,
	EVEX_K, Sched<[MR]>, NotMemoryFoldable;
	}
	}
	}

	multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
	SchedWrite Sched> {
	let hasSideEffects = 0, Uses = [MXCSR] in {
	def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
	(ins _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, {sae}, $src1, $dst\|$dst, $src1, {sae}, $src2}",
	[(set _dest.RC:$dst,
	(X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
	EVEX_B, Sched<[Sched]>;
	let Constraints = "$src0 = $dst" in
	def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
	(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, {sae}, $src2}",
	[(set _dest.RC:$dst,
	(X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
	_dest.RC:$src0, _src.KRCWM:$mask))]>,
	EVEX_B, Sched<[Sched]>, EVEX_K;
	def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
	(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src1, {sae}, $src2}",
	[(set _dest.RC:$dst,
	(X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
	_dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
	EVEX_B, Sched<[Sched]>, EVEX_KZ;
	}
	}

	let Predicates = [HasAVX512] in {
	defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
	WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
	avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
	EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;

	def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
	(VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
	}

	let Predicates = [HasVLX] in {
	defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
	WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
	EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
	defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
	WriteCvtPS2PH, WriteCvtPS2PHSt>,
	EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;

	def : Pat<(store (f64 (extractelt
	(bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
	(iPTR 0))), addr:$dst),
	(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
	def : Pat<(store (i64 (extractelt
	(bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
	(iPTR 0))), addr:$dst),
	(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
	def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
	(VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
	}

	// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
	multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
	string OpcodeStr, Domain d,
	X86FoldableSchedWrite sched = WriteFComX> {
	let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
	def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1\|$src1, $src2, {sae}}"), []>,
	EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
	}

	let Defs = [EFLAGS], Predicates = [HasAVX512] in {
	defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
	AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
	defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
	AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
	AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
	defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
	AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
	}

	let Defs = [EFLAGS], Predicates = [HasAVX512] in {
	defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
	"ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
	"ucomisd", SSEPackedDouble>, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
	"comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
	"comisd", SSEPackedDouble>, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
	let isCodeGenOnly = 1 in {
	defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
	sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
	sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
	sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
	sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
	}
	}

	let Defs = [EFLAGS], Predicates = [HasFP16] in {
	defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
	SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
	EVEX_CD8<16, CD8VT1>;
	defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
	SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
	EVEX_CD8<16, CD8VT1>;
	defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
	"ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
	VEX_LIG, EVEX_CD8<16, CD8VT1>;
	defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
	"comish", SSEPackedSingle>, T_MAP5PS, EVEX,
	VEX_LIG, EVEX_CD8<16, CD8VT1>;
	let isCodeGenOnly = 1 in {
	defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
	sse_load_f16, "ucomish", SSEPackedSingle>,
	T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;

	defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
	sse_load_f16, "comish", SSEPackedSingle>,
	T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
	}
	}

	/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
	multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	Predicate prd = HasAVX512> {
	let Predicates = [prd], ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
	EVEX_4V, VEX_LIG, Sched<[sched]>;
	defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1),
	(_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
	f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
	T_MAP6PD;
	defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
	SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
	EVEX_CD8<16, CD8VT1>, T_MAP6PD;
	let Uses = [MXCSR] in {
	defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
	f32x_info>, EVEX_CD8<32, CD8VT1>,
	T8PD;
	defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
	f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
	T8PD;
	defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
	SchedWriteFRsqrt.Scl, f32x_info>,
	EVEX_CD8<32, CD8VT1>, T8PD;
	defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
	SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
	EVEX_CD8<64, CD8VT1>, T8PD;
	}

	/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
	multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
	Sched<[sched]>;
	defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.VT
	(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr,
	"${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
	(OpNode (_.VT
	(_.BroadcastLdFrag addr:$src)))>,
	EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	let Uses = [MXCSR] in {
	defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
	v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
	defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
	v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
	}
	let Predicates = [HasFP16] in
	defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
	v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;

	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX], Uses = [MXCSR] in {
	defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
	OpNode, sched.XMM, v4f32x_info>,
	EVEX_V128, EVEX_CD8<32, CD8VF>;
	defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
	OpNode, sched.YMM, v8f32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VF>;
	defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
	OpNode, sched.XMM, v2f64x_info>,
	EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
	defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
	OpNode, sched.YMM, v4f64x_info>,
	EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
	OpNode, sched.XMM, v8f16x_info>,
	EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
	defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
	OpNode, sched.YMM, v16f16x_info>,
	EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
	}
	}

	defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
	defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;

	/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
	multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode OpNode, SDNode OpNodeSAE,
	X86FoldableSchedWrite sched> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
	defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
	Sched<[sched]>, SIMD_EXC;

	defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
	EVEX_B, Sched<[sched]>;

	defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}
	}

	multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
	defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
	sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
	defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
	sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
	}

	multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
	let Predicates = [HasFP16] in
	defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
	EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
	}

	let Predicates = [HasERI] in {
	defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
	SchedWriteFRcp.Scl>;
	defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
	SchedWriteFRsqrt.Scl>;
	}

	defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
	SchedWriteFRnd.Scl>,
	avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
	SchedWriteFRnd.Scl>;
	/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd

	multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	SDNode OpNode, X86FoldableSchedWrite sched> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.VT _.RC:$src))>,
	Sched<[sched]>;

	defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.VT
	(bitconvert (_.LdFrag addr:$src))))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr,
	"${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
	(OpNode (_.VT
	(_.BroadcastLdFrag addr:$src)))>,
	EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}
	multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	SDNode OpNode, X86FoldableSchedWrite sched> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr,
	"{sae}, $src", "$src, {sae}",
	(OpNode (_.VT _.RC:$src))>,
	EVEX_B, Sched<[sched]>;
	}

	multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeSAE, X86SchedWriteWidths sched> {
	defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
	avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
	T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
	avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
	T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched> {
	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
	sched.XMM>,
	EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
	sched.YMM>,
	EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
	sched.XMM>,
	EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
	sched.YMM>,
	EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
	}
	}

	multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeSAE, X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in
	defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
	avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
	T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
	let Predicates = [HasFP16, HasVLX] in {
	defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
	EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
	defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
	EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
	}
	}
	let Predicates = [HasERI] in {
	defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
	SchedWriteFRsqrt>, EVEX;
	defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
	SchedWriteFRcp>, EVEX;
	defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
	SchedWriteFAdd>, EVEX;
	}
	defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
	SchedWriteFRnd>,
	avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
	SchedWriteFRnd>,
	avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
	SchedWriteFRnd>, EVEX;

	multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
	(_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
	EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
	}

	multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(_.VT (any_fsqrt _.RC:$src)),
	(_.VT (fsqrt _.RC:$src))>, EVEX,
	Sched<[sched]>;
	defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
	(any_fsqrt (_.VT (_.LdFrag addr:$src))),
	(fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr,
	"${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
	(any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
	(fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
	EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	let Uses = [MXCSR], mayRaiseFPException = 1 in
	multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
	X86SchedWriteSizes sched> {
	let Predicates = [HasFP16] in
	defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
	sched.PH.ZMM, v32f16_info>,
	EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	let Predicates = [HasFP16, HasVLX] in {
	defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
	sched.PH.XMM, v8f16x_info>,
	EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
	sched.PH.YMM, v16f16x_info>,
	EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	}
	defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
	sched.PS.ZMM, v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
	sched.PD.ZMM, v8f64_info>,
	EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
	sched.PS.XMM, v4f32x_info>,
	EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
	sched.PS.YMM, v8f32x_info>,
	EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
	sched.PD.XMM, v2f64x_info>,
	EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
	sched.PD.YMM, v4f64x_info>,
	EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	}
	}

	let Uses = [MXCSR] in
	multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
	X86SchedWriteSizes sched> {
	let Predicates = [HasFP16] in
	defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
	sched.PH.ZMM, v32f16_info>,
	EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
	sched.PS.ZMM, v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
	sched.PD.ZMM, v8f64_info>,
	EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
	X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
	let ExeDomain = _.ExeDomain, Predicates = [prd] in {
	defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(X86fsqrts (_.VT _.RC:$src1),
	(_.VT _.RC:$src2))>,
	Sched<[sched]>, SIMD_EXC;
	defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(X86fsqrts (_.VT _.RC:$src1),
	(_.ScalarIntMemFrags addr:$src2))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	let Uses = [MXCSR] in
	defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(X86fsqrtRnds (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 timm:$rc))>,
	EVEX_B, EVEX_RC, Sched<[sched]>;

	let isCodeGenOnly = 1, hasSideEffects = 0 in {
	def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	Sched<[sched]>, SIMD_EXC;
	let mayLoad = 1 in
	def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}
	}

	let Predicates = [prd] in {
	def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
	(!cast<Instruction>(Name#Zr)
	(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
	}

	let Predicates = [prd, OptForSize] in {
	def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
	(!cast<Instruction>(Name#Zm)
	(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
	}
	}

	multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
	X86SchedWriteSizes sched> {
	defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
	EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
	defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
	EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
	defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
	EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
	}

	defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
	avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;

	defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;

	multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 timm:$src3)))>,
	Sched<[sched]>, SIMD_EXC;

	let Uses = [MXCSR] in
	defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
	"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
	(_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 timm:$src3)))>, EVEX_B,
	Sched<[sched]>;

	defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (X86RndScales _.RC:$src1,
	(_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;

	let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
	def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[]>, Sched<[sched]>, SIMD_EXC;

	let mayLoad = 1 in
	def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[]>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
	(_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
	_.FRC:$src1, timm:$src2))>;
	}

	let Predicates = [HasAVX512, OptForSize] in {
	def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
	(_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
	addr:$src1, timm:$src2))>;
	}
	}

	let Predicates = [HasFP16] in
	defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
	SchedWriteFRnd.Scl, f16x_info>,
	AVX512PSIi8Base, TA, EVEX_4V,
	EVEX_CD8<16, CD8VT1>;

	defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
	SchedWriteFRnd.Scl, f32x_info>,
	AVX512AIi8Base, EVEX_4V, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;

	defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
	SchedWriteFRnd.Scl, f64x_info>,
	VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
	EVEX_CD8<64, CD8VT1>;

	multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
	dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
	dag OutMask, Predicate BasePredicate> {
	let Predicates = [BasePredicate] in {
	def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
	(OpNode (extractelt _.VT:$src2, (iPTR 0))),
	(extractelt _.VT:$dst, (iPTR 0))))),
	(!cast<Instruction>("V"#OpcPrefix#r_Intk)
	_.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;

	def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
	(OpNode (extractelt _.VT:$src2, (iPTR 0))),
	ZeroFP))),
	(!cast<Instruction>("V"#OpcPrefix#r_Intkz)
	OutMask, _.VT:$src2, _.VT:$src1)>;
	}
	}

	defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
	(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
	fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
	defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
	(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
	fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
	defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
	(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
	fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;


	//-------------------------------------------------
	// Integer truncate and extend operations
	//-------------------------------------------------

	// PatFrags that contain a select and a truncate op. The take operands in the
	// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
	// either to the multiclasses.
	def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
	(vselect_mask node:$mask,
	(trunc node:$src), node:$src0)>;
	def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
	(vselect_mask node:$mask,
	(X86vtruncs node:$src), node:$src0)>;
	def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
	(vselect_mask node:$mask,
	(X86vtruncus node:$src), node:$src0)>;

	multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDPatternOperator MaskNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
	X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
	let ExeDomain = DestInfo.ExeDomain in {
	def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst\|$dst, $src}",
	[(set DestInfo.RC:$dst,
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
	EVEX, Sched<[sched]>;
	let Constraints = "$src0 = $dst" in
	def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
	(ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	[(set DestInfo.RC:$dst,
	(MaskNode (SrcInfo.VT SrcInfo.RC:$src),
	(DestInfo.VT DestInfo.RC:$src0),
	SrcInfo.KRCWM:$mask))]>,
	EVEX, EVEX_K, Sched<[sched]>;
	def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
	(ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	[(set DestInfo.RC:$dst,
	(DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
	DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
	EVEX, EVEX_KZ, Sched<[sched]>;
	}

	let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
	def mr : AVX512XS8I<opc, MRMDestMem, (outs),
	(ins x86memop:$dst, SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst\|$dst, $src}", []>,
	EVEX, Sched<[sched.Folded]>;

	def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
	(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}", []>,
	EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
	}//mayStore = 1, hasSideEffects = 0
	}

	multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
	PatFrag truncFrag, PatFrag mtruncFrag,
	string Name> {

	def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
	(!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
	addr:$dst, SrcInfo.RC:$src)>;

	def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
	SrcInfo.KRCWM:$mask),
	(!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
	addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
	}

	multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
	SDNode OpNode256, SDNode OpNode512,
	SDPatternOperator MaskNode128,
	SDPatternOperator MaskNode256,
	SDPatternOperator MaskNode512,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTSrcInfo,
	X86VectorVTInfo DestInfoZ128,
	X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
	X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
	X86MemOperand x86memopZ, PatFrag truncFrag,
	PatFrag mtruncFrag, Predicate prd = HasAVX512>{

	let Predicates = [HasVLX, prd] in {
	defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
	VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
	avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
	mtruncFrag, NAME>, EVEX_V128;

	defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
	VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
	avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
	mtruncFrag, NAME>, EVEX_V256;
	}
	let Predicates = [prd] in
	defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
	VTSrcInfo.info512, DestInfoZ, x86memopZ>,
	avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
	mtruncFrag, NAME>, EVEX_V512;
	}

	multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched, PatFrag StoreNode,
	PatFrag MaskedStoreNode, SDNode InVecNode,
	SDPatternOperator InVecMaskNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
	InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
	avx512vl_i64_info, v16i8x_info, v16i8x_info,
	v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
	MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
	}

	multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDPatternOperator MaskNode,
	X86SchedWriteWidths sched, PatFrag StoreNode,
	PatFrag MaskedStoreNode, SDNode InVecNode,
	SDPatternOperator InVecMaskNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
	InVecMaskNode, InVecMaskNode, MaskNode, sched,
	avx512vl_i64_info, v8i16x_info, v8i16x_info,
	v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
	MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
	}

	multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDPatternOperator MaskNode,
	X86SchedWriteWidths sched, PatFrag StoreNode,
	PatFrag MaskedStoreNode, SDNode InVecNode,
	SDPatternOperator InVecMaskNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
	InVecMaskNode, MaskNode, MaskNode, sched,
	avx512vl_i64_info, v4i32x_info, v4i32x_info,
	v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
	MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
	}

	multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDPatternOperator MaskNode,
	X86SchedWriteWidths sched, PatFrag StoreNode,
	PatFrag MaskedStoreNode, SDNode InVecNode,
	SDPatternOperator InVecMaskNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
	InVecMaskNode, InVecMaskNode, MaskNode, sched,
	avx512vl_i32_info, v16i8x_info, v16i8x_info,
	v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
	MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
	}

	multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDPatternOperator MaskNode,
	X86SchedWriteWidths sched, PatFrag StoreNode,
	PatFrag MaskedStoreNode, SDNode InVecNode,
	SDPatternOperator InVecMaskNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
	InVecMaskNode, MaskNode, MaskNode, sched,
	avx512vl_i32_info, v8i16x_info, v8i16x_info,
	v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
	MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
	}

	multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDPatternOperator MaskNode,
	X86SchedWriteWidths sched, PatFrag StoreNode,
	PatFrag MaskedStoreNode, SDNode InVecNode,
	SDPatternOperator InVecMaskNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
	InVecMaskNode, MaskNode, MaskNode, sched,
	avx512vl_i16_info, v16i8x_info, v16i8x_info,
	v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
	MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
	}

	defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
	SchedWriteVecTruncate, truncstorevi8,
	masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
	defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
	SchedWriteVecTruncate, truncstore_s_vi8,
	masked_truncstore_s_vi8, X86vtruncs,
	X86vmtruncs>;
	defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
	SchedWriteVecTruncate, truncstore_us_vi8,
	masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;

	defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
	SchedWriteVecTruncate, truncstorevi16,
	masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
	defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
	SchedWriteVecTruncate, truncstore_s_vi16,
	masked_truncstore_s_vi16, X86vtruncs,
	X86vmtruncs>;
	defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
	select_truncus, SchedWriteVecTruncate,
	truncstore_us_vi16, masked_truncstore_us_vi16,
	X86vtruncus, X86vmtruncus>;

	defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
	SchedWriteVecTruncate, truncstorevi32,
	masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
	defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
	SchedWriteVecTruncate, truncstore_s_vi32,
	masked_truncstore_s_vi32, X86vtruncs,
	X86vmtruncs>;
	defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
	select_truncus, SchedWriteVecTruncate,
	truncstore_us_vi32, masked_truncstore_us_vi32,
	X86vtruncus, X86vmtruncus>;

	defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
	SchedWriteVecTruncate, truncstorevi8,
	masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
	defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
	SchedWriteVecTruncate, truncstore_s_vi8,
	masked_truncstore_s_vi8, X86vtruncs,
	X86vmtruncs>;
	defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
	select_truncus, SchedWriteVecTruncate,
	truncstore_us_vi8, masked_truncstore_us_vi8,
	X86vtruncus, X86vmtruncus>;

	defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
	SchedWriteVecTruncate, truncstorevi16,
	masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
	defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
	SchedWriteVecTruncate, truncstore_s_vi16,
	masked_truncstore_s_vi16, X86vtruncs,
	X86vmtruncs>;
	defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
	select_truncus, SchedWriteVecTruncate,
	truncstore_us_vi16, masked_truncstore_us_vi16,
	X86vtruncus, X86vmtruncus>;

	defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
	SchedWriteVecTruncate, truncstorevi8,
	masked_truncstorevi8, X86vtrunc,
	X86vmtrunc>;
	defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
	SchedWriteVecTruncate, truncstore_s_vi8,
	masked_truncstore_s_vi8, X86vtruncs,
	X86vmtruncs>;
	defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
	select_truncus, SchedWriteVecTruncate,
	truncstore_us_vi8, masked_truncstore_us_vi8,
	X86vtruncus, X86vmtruncus>;

	let Predicates = [HasAVX512, NoVLX] in {
	def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
	(v8i16 (EXTRACT_SUBREG
	(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src, sub_ymm)))), sub_xmm))>;
	def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
	(v4i32 (EXTRACT_SUBREG
	(v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src, sub_ymm)))), sub_xmm))>;
	}

	let Predicates = [HasBWI, NoVLX] in {
	def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
	(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src, sub_ymm))), sub_xmm))>;
	}

	// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
	multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
	X86VectorVTInfo DestInfo,
	X86VectorVTInfo SrcInfo> {
	def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
	DestInfo.RC:$src0,
	SrcInfo.KRCWM:$mask)),
	(!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
	SrcInfo.KRCWM:$mask,
	SrcInfo.RC:$src)>;

	def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
	DestInfo.ImmAllZerosV,
	SrcInfo.KRCWM:$mask)),
	(!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
	SrcInfo.RC:$src)>;
	}

	let Predicates = [HasVLX] in {
	defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
	defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
	defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
	}

	let Predicates = [HasAVX512] in {
	defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
	defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
	defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;

	defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
	defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
	defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;

	defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
	defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
	defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
	}

	multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
	X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
	X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
	let ExeDomain = DestInfo.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
	EVEX, Sched<[sched]>;

	defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
	(ins x86memop:$src), OpcodeStr ,"$src", "$src",
	(DestInfo.VT (LdFrag addr:$src))>,
	EVEX, Sched<[sched.Folded]>;
	}
	}

	multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode InVecNode, string ExtTy,
	X86SchedWriteWidths sched,
	PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
	let Predicates = [HasVLX, HasBWI] in {
	defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
	v16i8x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;

	defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
	v16i8x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
	}
	let Predicates = [HasBWI] in {
	defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
	v32i8x_info, i256mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
	}
	}

	multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode InVecNode, string ExtTy,
	X86SchedWriteWidths sched,
	PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
	v16i8x_info, i32mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;

	defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
	v16i8x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
	v16i8x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
	}
	}

	multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
	SDNode InVecNode, string ExtTy,
	X86SchedWriteWidths sched,
	PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
	v16i8x_info, i16mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;

	defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
	v16i8x_info, i32mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
	v16i8x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
	}
	}

	multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode InVecNode, string ExtTy,
	X86SchedWriteWidths sched,
	PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
	v8i16x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;

	defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
	v8i16x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
	v16i16x_info, i256mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
	}
	}

	multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode InVecNode, string ExtTy,
	X86SchedWriteWidths sched,
	PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
	v8i16x_info, i32mem, LdFrag, InVecNode>,
	EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;

	defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
	v8i16x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
	v8i16x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
	}
	}

	multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode InVecNode, string ExtTy,
	X86SchedWriteWidths sched,
	PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {

	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
	v4i32x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;

	defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
	v4i32x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
	v8i32x_info, i256mem, LdFrag, OpNode>,
	EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
	}
	}

	defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
	defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
	defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>;
	defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
	defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
	defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;

	defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
	defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
	defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>;
	defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
	defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
	defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;


	// Patterns that we also need any extend versions of. aext_vector_inreg
	// is currently legalized to zext_vector_inreg.
	multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
	// 256-bit patterns
	let Predicates = [HasVLX, HasBWI] in {
	def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
	(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
	}

	let Predicates = [HasVLX] in {
	def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
	(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;

	def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
	(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
	}

	// 512-bit patterns
	let Predicates = [HasBWI] in {
	def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
	(!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
	}
	let Predicates = [HasAVX512] in {
	def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
	(!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
	def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
	(!cast<I>(OpcPrefix#WDZrm) addr:$src)>;

	def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
	(!cast<I>(OpcPrefix#WQZrm) addr:$src)>;

	def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
	(!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
	}
	}

	multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
	SDNode InVecOp> :
	AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
	// 128-bit patterns
	let Predicates = [HasVLX, HasBWI] in {
	def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	}
	let Predicates = [HasVLX] in {
	def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
	(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
	(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;

	def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;

	def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;

	def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
	(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
	(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;

	def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	}
	let Predicates = [HasVLX] in {
	def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;

	def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
	(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;

	def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
	}
	// 512-bit patterns
	let Predicates = [HasAVX512] in {
	def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
	def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
	def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
	(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
	}
	}

	defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
	defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;

	// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
	// ext+trunc aggressively making it impossible to legalize the DAG to this
	// pattern directly.
	let Predicates = [HasAVX512, NoBWI] in {
	def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
	(VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
	def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
	(VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
	}

	//===----------------------------------------------------------------------===//
	// GATHER - SCATTER Operations

	// FIXME: Improve scheduling of gather/scatter instructions.
	multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
	let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
	ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
	def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
	(ins _.RC:$src1, MaskRC:$mask, memop:$src2),
	!strconcat(OpcodeStr#_.Suffix,
	"\t{$src2, ${dst} {${mask}}\|${dst} {${mask}}, $src2}"),
	[]>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
	}

	multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
	vy512xmem>, EVEX_V512, VEX_W;
	defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
	vz512mem>, EVEX_V512, VEX_W;
	let Predicates = [HasVLX] in {
	defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
	vx256xmem>, EVEX_V256, VEX_W;
	defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
	vy256xmem>, EVEX_V256, VEX_W;
	defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
	vx128xmem>, EVEX_V128, VEX_W;
	defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
	vx128xmem>, EVEX_V128, VEX_W;
	}
	}

	multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
	EVEX_V512;
	defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
	EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
	vy256xmem>, EVEX_V256;
	defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
	vy128xmem>, EVEX_V256;
	defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
	vx128xmem>, EVEX_V128;
	defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
	vx64xmem, VK2WM>, EVEX_V128;
	}
	}


	defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
	avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;

	defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
	avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;

	multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {

	let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
	hasSideEffects = 0 in

	def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
	(ins memop:$dst, MaskRC:$mask, _.RC:$src),
	!strconcat(OpcodeStr#_.Suffix,
	"\t{$src, ${dst} {${mask}}\|${dst} {${mask}}, $src}"),
	[]>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[WriteStore]>;
	}

	multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
	vy512xmem>, EVEX_V512, VEX_W;
	defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
	vz512mem>, EVEX_V512, VEX_W;
	let Predicates = [HasVLX] in {
	defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
	vx256xmem>, EVEX_V256, VEX_W;
	defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
	vy256xmem>, EVEX_V256, VEX_W;
	defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
	vx128xmem>, EVEX_V128, VEX_W;
	defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
	vx128xmem>, EVEX_V128, VEX_W;
	}
	}

	multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
	EVEX_V512;
	defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
	EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
	vy256xmem>, EVEX_V256;
	defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
	vy128xmem>, EVEX_V256;
	defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
	vx128xmem>, EVEX_V128;
	defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
	vx64xmem, VK2WM>, EVEX_V128;
	}
	}

	defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
	avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;

	defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
	avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;

	// prefetch
	multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
	RegisterClass KRC, X86MemOperand memop> {
	let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
	def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
	!strconcat(OpcodeStr, "\t{$src {${mask}}\|{${mask}}, $src}"), []>,
	EVEX, EVEX_K, Sched<[WriteLoad]>;
	}

	defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
	VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
	VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
	VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
	VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
	VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
	VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
	VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
	VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
	def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
	!strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst\|$dst, $src}"),
	[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
	EVEX, Sched<[Sched]>;
	}

	multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
	string OpcodeStr, Predicate prd> {
	let Predicates = [prd] in
	defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
	defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
	}
	}

	defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
	defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
	defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
	defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;

	multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
	def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
	EVEX, Sched<[WriteMove]>;
	}

	// Use 512bit version to implement 128/256 bit in case NoVLX.
	multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
	X86VectorVTInfo _,
	string Name> {

	def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
	(_.KVT (COPY_TO_REGCLASS
	(!cast<Instruction>(Name#"Zrr")
	(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
	_.RC:$src, _.SubRegIdx)),
	_.KRC))>;
	}

	multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
	EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
	EVEX_V256;
	defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
	EVEX_V128;
	}
	let Predicates = [prd, NoVLX] in {
	defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
	defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
	}
	}

	defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
	avx512vl_i8_info, HasBWI>;
	defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
	avx512vl_i16_info, HasBWI>, VEX_W;
	defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
	avx512vl_i32_info, HasDQI>;
	defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
	avx512vl_i64_info, HasDQI>, VEX_W;

	// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
	// is available, but BWI is not. We can't handle this in lowering because
	// a target independent DAG combine likes to combine sext and trunc.
	let Predicates = [HasDQI, NoBWI] in {
	def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
	(VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
	def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
	(VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
	}

	let Predicates = [HasDQI, NoBWI, HasVLX] in {
	def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
	(VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - COMPRESS and EXPAND
	//

	multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
	string OpcodeStr, X86FoldableSchedWrite sched> {
	defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
	(null_frag)>, AVX5128IBase,
	Sched<[sched]>;

	let mayStore = 1, hasSideEffects = 0 in
	def mr : AVX5128I<opc, MRMDestMem, (outs),
	(ins _.MemOp:$dst, _.RC:$src),
	OpcodeStr # "\t{$src, $dst\|$dst, $src}",
	[]>, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[sched.Folded]>;

	def mrk : AVX5128I<opc, MRMDestMem, (outs),
	(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	[]>,
	EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[sched.Folded]>;
	}

	multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
	def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#mrk)
	addr:$dst, _.KRCWM:$mask, _.RC:$src)>;

	def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#rrk)
	_.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
	def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#rrkz)
	_.KRCWM:$mask, _.RC:$src)>;
	}

	multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo VTInfo,
	Predicate Pred = HasAVX512> {
	let Predicates = [Pred] in
	defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
	compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;

	let Predicates = [Pred, HasVLX] in {
	defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
	compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
	defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
	compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
	}
	}

	// FIXME: Is there a better scheduler class for VPCOMPRESS?
	defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
	avx512vl_i32_info>, EVEX, NotMemoryFoldable;
	defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
	avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
	defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
	avx512vl_f32_info>, EVEX, NotMemoryFoldable;
	defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
	avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;

	// expand
	multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
	string OpcodeStr, X86FoldableSchedWrite sched> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
	(null_frag)>, AVX5128IBase,
	Sched<[sched]>;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
	(null_frag)>,
	AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {

	def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
	(!cast<Instruction>(Name#_.ZSuffix#rmkz)
	_.KRCWM:$mask, addr:$src)>;

	def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
	(!cast<Instruction>(Name#_.ZSuffix#rmkz)
	_.KRCWM:$mask, addr:$src)>;

	def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
	(_.VT _.RC:$src0))),
	(!cast<Instruction>(Name#_.ZSuffix#rmk)
	_.RC:$src0, _.KRCWM:$mask, addr:$src)>;

	def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#rrk)
	_.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
	def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
	(!cast<Instruction>(Name#_.ZSuffix#rrkz)
	_.KRCWM:$mask, _.RC:$src)>;
	}

	multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo VTInfo,
	Predicate Pred = HasAVX512> {
	let Predicates = [Pred] in
	defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
	expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;

	let Predicates = [Pred, HasVLX] in {
	defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
	expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
	defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
	expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
	}
	}

	// FIXME: Is there a better scheduler class for VPEXPAND?
	defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
	avx512vl_i32_info>, EVEX;
	defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
	avx512vl_i64_info>, EVEX, VEX_W;
	defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
	avx512vl_f32_info>, EVEX;
	defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
	avx512vl_f64_info>, EVEX, VEX_W;

	//handle instruction reg_vec1 = op(reg_vec,imm)
	// op(mem_vec,imm)
	// op(broadcast(eltVt),imm)
	//all instruction created with FROUND_CURRENT
	multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
	(MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
	Sched<[sched]>;
	defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
	(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 timm:$src2)),
	(MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 timm:$src2))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
	"${src1}"#_.BroadcastStr#", $src2",
	(OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
	(i32 timm:$src2)),
	(MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
	(i32 timm:$src2))>, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
	multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
	"$src1, {sae}, $src2",
	(OpNode (_.VT _.RC:$src1),
	(i32 timm:$src2))>,
	EVEX_B, Sched<[sched]>;
	}

	multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
	AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
	Predicate prd>{
	let Predicates = [prd] in {
	defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.ZMM, _.info512>,
	avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
	sched.ZMM, _.info512>, EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.XMM, _.info128>, EVEX_V128;
	defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
	sched.YMM, _.info256>, EVEX_V256;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_vec,imm)
	// op(reg_vec2,broadcast(eltVt),imm)
	//all instruction created with FROUND_CURRENT
	multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 timm:$src3))>,
	Sched<[sched]>;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	(i32 timm:$src3))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr#", $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (_.BroadcastLdFrag addr:$src2)),
	(i32 timm:$src3))>, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_vec,imm)
	multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
	X86VectorVTInfo SrcInfo>{
	let ExeDomain = DestInfo.ExeDomain in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
	(SrcInfo.VT SrcInfo.RC:$src2),
	(i8 timm:$src3)))>,
	Sched<[sched]>;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
	(SrcInfo.VT (bitconvert
	(SrcInfo.LdFrag addr:$src2))),
	(i8 timm:$src3)))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_vec,imm)
	// op(reg_vec2,broadcast(eltVt),imm)
	multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _>:
	avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{

	let ExeDomain = _.ExeDomain in
	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr#", $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (_.BroadcastLdFrag addr:$src2)),
	(i8 timm:$src3))>, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_scalar,imm)
	multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 timm:$src3))>,
	Sched<[sched]>;
	defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.ScalarIntMemFrags addr:$src2),
	(i32 timm:$src3))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
	multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86FoldableSchedWrite sched,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, {sae}, $src2, $src1",
	"$src1, $src2, {sae}, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 timm:$src3))>,
	EVEX_B, Sched<[sched]>;
	}

	//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
	multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
	defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, {sae}, $src2, $src1",
	"$src1, $src2, {sae}, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 timm:$src3))>,
	EVEX_B, Sched<[sched]>;
	}

	multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
	AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
	SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
	let Predicates = [prd] in {
	defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
	avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
	EVEX_V512;

	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
	EVEX_V128;
	defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
	EVEX_V256;
	}
	}

	multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
	AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
	let Predicates = [Pred] in {
	defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
	SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
	}
	let Predicates = [Pred, HasVLX] in {
	defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
	SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
	defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
	SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
	}
	}

	multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
	bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
	Predicate Pred = HasAVX512> {
	let Predicates = [Pred] in {
	defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
	EVEX_V512;
	}
	let Predicates = [Pred, HasVLX] in {
	defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
	EVEX_V128;
	defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
	EVEX_V256;
	}
	}

	multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
	X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
	SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
	let Predicates = [prd] in {
	defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
	avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
	}
	}

	multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
	bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
	X86SchedWriteWidths sched, Predicate prd>{
	defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
	opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
	AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
	defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
	opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
	AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
	defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
	opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
	AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
	}

	defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
	X86VReduce, X86VReduce, X86VReduceSAE,
	SchedWriteFRnd, HasDQI>;
	defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
	X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
	SchedWriteFRnd, HasAVX512>;
	defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
	X86VGetMant, X86VGetMant, X86VGetMantSAE,
	SchedWriteFRnd, HasAVX512>;

	defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
	0x50, X86VRange, X86VRangeSAE,
	SchedWriteFAdd, HasDQI>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
	0x50, X86VRange, X86VRangeSAE,
	SchedWriteFAdd, HasDQI>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;

	defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
	f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
	0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;

	defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
	0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
	0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
	defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
	0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
	AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;

	defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
	0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
	0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
	defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
	0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
	AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;

	multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _,
	X86VectorVTInfo CastInfo,
	string EVEX2VEXOvrd> {
	let ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (bitconvert
	(CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
	(i8 timm:$src3)))))>,
	Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT
	(bitconvert
	(CastInfo.VT (X86Shuf128 _.RC:$src1,
	(CastInfo.LdFrag addr:$src2),
	(i8 timm:$src3)))))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>,
	EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr#", $src3",
	(_.VT
	(bitconvert
	(CastInfo.VT
	(X86Shuf128 _.RC:$src1,
	(_.BroadcastLdFrag addr:$src2),
	(i8 timm:$src3)))))>, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
	AVX512VLVectorVTInfo _,
	AVX512VLVectorVTInfo CastInfo, bits<8> opc,
	string EVEX2VEXOvrd>{
	let Predicates = [HasAVX512] in
	defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
	_.info512, CastInfo.info512, "">, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in
	defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
	_.info256, CastInfo.info256,
	EVEX2VEXOvrd>, EVEX_V256;
	}

	defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
	avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
	defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
	avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
	avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
	defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
	avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;

	multiclass avx512_valign<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _>{
	// NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
	// instantiation of this class.
	let ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
	Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (X86VAlign _.RC:$src1,
	(bitconvert (_.LdFrag addr:$src2)),
	(i8 timm:$src3)))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>,
	EVEX2VEXOverride<"VPALIGNRrmi">;

	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
	"$src1, ${src2}"#_.BroadcastStr#", $src3",
	(X86VAlign _.RC:$src1,
	(_.VT (_.BroadcastLdFrag addr:$src2)),
	(i8 timm:$src3))>, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
	AVX512AIi8Base, EVEX_4V, EVEX_V512;
	}
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
	AVX512AIi8Base, EVEX_4V, EVEX_V128;
	// We can't really override the 256-bit version so change it back to unset.
	let EVEX2VEXOverride = ? in
	defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
	AVX512AIi8Base, EVEX_4V, EVEX_V256;
	}
	}

	defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
	avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
	avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
	VEX_W;

	defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
	SchedWriteShuffle, avx512vl_i8_info,
	avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;

	// Fragments to help convert valignq into masked valignd. Or valignq/valignd
	// into vpalignr.
	def ValignqImm32XForm : SDNodeXForm<timm, [{
	return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
	}]>;
	def ValignqImm8XForm : SDNodeXForm<timm, [{
	return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
	}]>;
	def ValigndImm8XForm : SDNodeXForm<timm, [{
	return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
	}]>;

	multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo From, X86VectorVTInfo To,
	SDNodeXForm ImmXForm> {
	def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
	(bitconvert
	(From.VT (OpNode From.RC:$src1, From.RC:$src2,
	timm:$src3))),
	To.RC:$src0)),
	(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
	To.RC:$src1, To.RC:$src2,
	(ImmXForm timm:$src3))>;

	def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
	(bitconvert
	(From.VT (OpNode From.RC:$src1, From.RC:$src2,
	timm:$src3))),
	To.ImmAllZerosV)),
	(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
	To.RC:$src1, To.RC:$src2,
	(ImmXForm timm:$src3))>;

	def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
	(bitconvert
	(From.VT (OpNode From.RC:$src1,
	(From.LdFrag addr:$src2),
	timm:$src3))),
	To.RC:$src0)),
	(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
	To.RC:$src1, addr:$src2,
	(ImmXForm timm:$src3))>;

	def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
	(bitconvert
	(From.VT (OpNode From.RC:$src1,
	(From.LdFrag addr:$src2),
	timm:$src3))),
	To.ImmAllZerosV)),
	(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
	To.RC:$src1, addr:$src2,
	(ImmXForm timm:$src3))>;
	}

	multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo From,
	X86VectorVTInfo To,
	SDNodeXForm ImmXForm> :
	avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
	def : Pat<(From.VT (OpNode From.RC:$src1,
	(bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
	timm:$src3)),
	(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
	(ImmXForm timm:$src3))>;

	def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
	(bitconvert
	(From.VT (OpNode From.RC:$src1,
	(bitconvert
	(To.VT (To.BroadcastLdFrag addr:$src2))),
	timm:$src3))),
	To.RC:$src0)),
	(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
	To.RC:$src1, addr:$src2,
	(ImmXForm timm:$src3))>;

	def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
	(bitconvert
	(From.VT (OpNode From.RC:$src1,
	(bitconvert
	(To.VT (To.BroadcastLdFrag addr:$src2))),
	timm:$src3))),
	To.ImmAllZerosV)),
	(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
	To.RC:$src1, addr:$src2,
	(ImmXForm timm:$src3))>;
	}

	let Predicates = [HasAVX512] in {
	// For 512-bit we lower to the widest element type we can. So we only need
	// to handle converting valignq to valignd.
	defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
	v16i32_info, ValignqImm32XForm>;
	}

	let Predicates = [HasVLX] in {
	// For 128-bit we lower to the widest element type we can. So we only need
	// to handle converting valignq to valignd.
	defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
	v4i32x_info, ValignqImm32XForm>;
	// For 256-bit we lower to the widest element type we can. So we only need
	// to handle converting valignq to valignd.
	defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
	v8i32x_info, ValignqImm32XForm>;
	}

	let Predicates = [HasVLX, HasBWI] in {
	// We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
	defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
	v16i8x_info, ValignqImm8XForm>;
	defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
	v16i8x_info, ValigndImm8XForm>;
	}

	defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
	SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
	EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;

	multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1), OpcodeStr,
	"$src1", "$src1",
	(_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
	Sched<[sched]>;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1), OpcodeStr,
	"$src1", "$src1",
	(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
	EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded]>;
	}
	}

	multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> :
	avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src1), OpcodeStr,
	"${src1}"#_.BroadcastStr,
	"${src1}"#_.BroadcastStr,
	(_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
	EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded]>;
	}

	multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
	EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
	EVEX_V128;
	}
	}

	multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
	Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
	EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
	EVEX_V128;
	}
	}

	multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched,
	Predicate prd> {
	defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
	avx512vl_i64_info, prd>, VEX_W;
	defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
	avx512vl_i32_info, prd>;
	}

	multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
	SDNode OpNode, X86SchedWriteWidths sched,
	Predicate prd> {
	defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
	avx512vl_i16_info, prd>, VEX_WIG;
	defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
	avx512vl_i8_info, prd>, VEX_WIG;
	}

	multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
	bits<8> opc_d, bits<8> opc_q,
	string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
	HasAVX512>,
	avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
	HasBWI>;
	}

	defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
	SchedWriteVecALU>;

	// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v4i64 (abs VR256X:$src)),
	(EXTRACT_SUBREG
	(VPABSQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
	sub_ymm)>;
	def : Pat<(v2i64 (abs VR128X:$src)),
	(EXTRACT_SUBREG
	(VPABSQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
	sub_xmm)>;
	}

	// Use 512bit version to implement 128/256 bit.
	multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
	AVX512VLVectorVTInfo _, Predicate prd> {
	let Predicates = [prd, NoVLX] in {
	def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(InstrStr # "Zrr")
	(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
	_.info256.RC:$src1,
	_.info256.SubRegIdx)),
	_.info256.SubRegIdx)>;

	def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(InstrStr # "Zrr")
	(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
	_.info128.RC:$src1,
	_.info128.SubRegIdx)),
	_.info128.SubRegIdx)>;
	}
	}

	defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
	SchedWriteVecIMul, HasCDI>;

	// FIXME: Is there a better scheduler class for VPCONFLICT?
	defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
	SchedWriteVecALU, HasCDI>;

	// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
	defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
	defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;

	//===---------------------------------------------------------------------===//
	// Counts number of ones - VPOPCNTD and VPOPCNTQ
	//===---------------------------------------------------------------------===//

	// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
	defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
	SchedWriteVecALU, HasVPOPCNTDQ>;

	defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
	defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;

	//===---------------------------------------------------------------------===//
	// Replicate Single FP - MOVSHDUP and MOVSLDUP
	//===---------------------------------------------------------------------===//

	multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
	avx512vl_f32_info, HasAVX512>, XS;
	}

	defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
	SchedWriteFShuffle>;
	defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
	SchedWriteFShuffle>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - MOVDDUP
	//===----------------------------------------------------------------------===//

	multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
	Sched<[sched]>;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
	(_.VT (_.BroadcastLdFrag addr:$src))>,
	EVEX, EVEX_CD8<_.EltSize, CD8VH>,
	Sched<[sched.Folded]>;
	}
	}

	multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
	defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
	VTInfo.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
	VTInfo.info256>, EVEX_V256;
	defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
	VTInfo.info128>, EVEX_V128;
	}
	}

	multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched> {
	defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
	avx512vl_f64_info>, XD, VEX_W;
	}

	defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;

	let Predicates = [HasVLX] in {
	def : Pat<(v2f64 (X86VBroadcast f64:$src)),
	(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;

	def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
	(v2f64 VR128X:$src0)),
	(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
	(v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
	def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
	immAllZerosV),
	(VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - Unpack Instructions
	//===----------------------------------------------------------------------===//

	let Uses = []<Register>, mayRaiseFPException = 0 in {
	defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
	SchedWriteFShuffleSizes, 0, 1>;
	defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
	SchedWriteFShuffleSizes>;
	}

	defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
	SchedWriteShuffle, HasBWI>;
	defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
	SchedWriteShuffle, HasBWI>;
	defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
	SchedWriteShuffle, HasBWI>;
	defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
	SchedWriteShuffle, HasBWI>;

	defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
	SchedWriteShuffle, HasAVX512>;
	defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
	SchedWriteShuffle, HasAVX512>;
	defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
	SchedWriteShuffle, HasAVX512>;
	defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
	SchedWriteShuffle, HasAVX512>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - Extract & Insert Integer Instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	def mr : AVX512Ii8<opc, MRMDestMem, (outs),
	(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
	addr:$dst)]>,
	EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
	}

	multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
	let Predicates = [HasBWI] in {
	def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GR32orGR64:$dst,
	(X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
	EVEX, TAPD, Sched<[WriteVecExtract]>;

	defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
	}
	}

	multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
	let Predicates = [HasBWI] in {
	def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GR32orGR64:$dst,
	(X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
	EVEX, PD, Sched<[WriteVecExtract]>;

	let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
	def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	EVEX, TAPD, FoldGenData<NAME#rr>,
	Sched<[WriteVecExtract]>;

	defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
	}
	}

	multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
	RegisterClass GRC> {
	let Predicates = [HasDQI] in {
	def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GRC:$dst,
	(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
	EVEX, TAPD, Sched<[WriteVecExtract]>;

	def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
	(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (extractelt (_.VT _.RC:$src1),
	imm:$src2),addr:$dst)]>,
	EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
	Sched<[WriteVecExtractSt]>;
	}
	}

	defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
	defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
	defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
	defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;

	multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, PatFrag LdFrag,
	SDPatternOperator immoperator> {
	def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set _.RC:$dst,
	(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
	EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
	}

	multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, PatFrag LdFrag> {
	let Predicates = [HasBWI] in {
	def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set _.RC:$dst,
	(OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
	Sched<[WriteVecInsert]>;

	defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
	}
	}

	multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, RegisterClass GRC> {
	let Predicates = [HasDQI] in {
	def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, GRC:$src2, u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set _.RC:$dst,
	(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
	EVEX_4V, TAPD, Sched<[WriteVecInsert]>;

	defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
	_.ScalarLdFrag, imm>, TAPD;
	}
	}

	defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
	extloadi8>, TAPD, VEX_WIG;
	defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
	extloadi16>, PD, VEX_WIG;
	defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
	defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;

	let Predicates = [HasAVX512, NoBWI] in {
	def : Pat<(X86pinsrb VR128:$src1,
	(i32 (anyext (i8 (bitconvert v8i1:$src2)))),
	timm:$src3),
	(VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
	timm:$src3)>;
	}

	let Predicates = [HasBWI] in {
	def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
	(VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
	GR8:$src2, sub_8bit), timm:$src3)>;
	def : Pat<(X86pinsrb VR128:$src1,
	(i32 (anyext (i8 (bitconvert v8i1:$src2)))),
	timm:$src3),
	(VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
	timm:$src3)>;
	}

	// Always select FP16 instructions if available.
	let Predicates = [HasBWI], AddedComplexity = -10 in {
	def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
	def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
	def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
	def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
	}

	//===----------------------------------------------------------------------===//
	// VSHUFPS - VSHUFPD Operations
	//===----------------------------------------------------------------------===//

	multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
	defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
	SchedWriteFShuffle>,
	EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
	AVX512AIi8Base, EVEX_4V;
	}

	defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
	defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;

	//===----------------------------------------------------------------------===//
	// AVX-512 - Byte shift Left/Right
	//===----------------------------------------------------------------------===//

	multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
	Format MRMm, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _>{
	def ri : AVX512<opc, MRMr,
	(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
	Sched<[sched]>;
	def mi : AVX512<opc, MRMm,
	(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst,(_.VT (OpNode
	(_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i8 timm:$src2))))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
	Format MRMm, string OpcodeStr,
	X86SchedWriteWidths sched, Predicate prd>{
	let Predicates = [prd] in
	defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
	sched.ZMM, v64i8_info>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
	sched.YMM, v32i8x_info>, EVEX_V256;
	defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
	sched.XMM, v16i8x_info>, EVEX_V128;
	}
	}
	defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
	SchedWriteShuffle, HasBWI>,
	AVX512PDIi8Base, EVEX_4V, VEX_WIG;
	defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
	SchedWriteShuffle, HasBWI>,
	AVX512PDIi8Base, EVEX_4V, VEX_WIG;

	multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
	string OpcodeStr, X86FoldableSchedWrite sched,
	X86VectorVTInfo _dst, X86VectorVTInfo _src> {
	let isCommutable = 1 in
	def rr : AVX512BI<opc, MRMSrcReg,
	(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _dst.RC:$dst,(_dst.VT
	(OpNode (_src.VT _src.RC:$src1),
	(_src.VT _src.RC:$src2))))]>,
	Sched<[sched]>;
	def rm : AVX512BI<opc, MRMSrcMem,
	(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _dst.RC:$dst,(_dst.VT
	(OpNode (_src.VT _src.RC:$src1),
	(_src.VT (bitconvert
	(_src.LdFrag addr:$src2))))))]>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
	string OpcodeStr, X86SchedWriteWidths sched,
	Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
	v8i64_info, v64i8_info>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
	v4i64x_info, v32i8x_info>, EVEX_V256;
	defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
	v2i64x_info, v16i8x_info>, EVEX_V128;
	}
	}

	defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
	SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;

	// Transforms to swizzle an immediate to enable better matching when
	// memory operand isn't in the right place.
	def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
	// Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
	uint8_t Imm = N->getZExtValue();
	// Swap bits 1/4 and 3/6.
	uint8_t NewImm = Imm & 0xa5;
	if (Imm & 0x02) NewImm \|= 0x10;
	if (Imm & 0x10) NewImm \|= 0x02;
	if (Imm & 0x08) NewImm \|= 0x40;
	if (Imm & 0x40) NewImm \|= 0x08;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
	// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
	uint8_t Imm = N->getZExtValue();
	// Swap bits 2/4 and 3/5.
	uint8_t NewImm = Imm & 0xc3;
	if (Imm & 0x04) NewImm \|= 0x10;
	if (Imm & 0x10) NewImm \|= 0x04;
	if (Imm & 0x08) NewImm \|= 0x20;
	if (Imm & 0x20) NewImm \|= 0x08;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
	// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
	uint8_t Imm = N->getZExtValue();
	// Swap bits 1/2 and 5/6.
	uint8_t NewImm = Imm & 0x99;
	if (Imm & 0x02) NewImm \|= 0x04;
	if (Imm & 0x04) NewImm \|= 0x02;
	if (Imm & 0x20) NewImm \|= 0x40;
	if (Imm & 0x40) NewImm \|= 0x20;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
	// Convert a VPTERNLOG immediate by moving operand 1 to the end.
	uint8_t Imm = N->getZExtValue();
	// Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
	uint8_t NewImm = Imm & 0x81;
	if (Imm & 0x02) NewImm \|= 0x04;
	if (Imm & 0x04) NewImm \|= 0x10;
	if (Imm & 0x08) NewImm \|= 0x40;
	if (Imm & 0x10) NewImm \|= 0x02;
	if (Imm & 0x20) NewImm \|= 0x08;
	if (Imm & 0x40) NewImm \|= 0x20;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
	// Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
	uint8_t Imm = N->getZExtValue();
	// Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
	uint8_t NewImm = Imm & 0x81;
	if (Imm & 0x02) NewImm \|= 0x10;
	if (Imm & 0x04) NewImm \|= 0x02;
	if (Imm & 0x08) NewImm \|= 0x20;
	if (Imm & 0x10) NewImm \|= 0x04;
	if (Imm & 0x20) NewImm \|= 0x40;
	if (Imm & 0x40) NewImm \|= 0x08;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;

	multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	string Name>{
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
	OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.VT _.RC:$src3),
	(i8 timm:$src4)), 1, 1>,
	AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
	defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
	OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.VT (bitconvert (_.LdFrag addr:$src3))),
	(i8 timm:$src4)), 1, 0>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
	OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
	"$src2, ${src3}"#_.BroadcastStr#", $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.VT (_.BroadcastLdFrag addr:$src3)),
	(i8 timm:$src4)), 1, 0>, EVEX_B,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}// Constraints = "$src1 = $dst"

	// Additional patterns for matching passthru operand in other positions.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;

	// Additional patterns for matching zero masking with loads in other
	// positions.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, (i8 timm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;

	// Additional patterns for matching masked loads with different
	// operand orders.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src2, _.RC:$src1,
	(bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src1, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;

	// Additional patterns for matching zero masking with broadcasts in other
	// positions.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode (_.BroadcastLdFrag addr:$src3),
	_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3,
	(VPTERNLOG321_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src1,
	(_.BroadcastLdFrag addr:$src3),
	_.RC:$src2, (i8 timm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3,
	(VPTERNLOG132_imm8 timm:$src4))>;

	// Additional patterns for matching masked broadcasts with different
	// operand orders.
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
	_.RC:$src2, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode (_.BroadcastLdFrag addr:$src3),
	_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src2, _.RC:$src1,
	(_.BroadcastLdFrag addr:$src3),
	(i8 timm:$src4)), _.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode _.RC:$src2,
	(_.BroadcastLdFrag addr:$src3),
	_.RC:$src1, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
	def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
	(OpNode (_.BroadcastLdFrag addr:$src3),
	_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
	}

	multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
	_.info512, NAME>, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
	_.info128, NAME>, EVEX_V128;
	defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
	_.info256, NAME>, EVEX_V256;
	}
	}

	defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
	avx512vl_i32_info>;
	defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
	avx512vl_i64_info>, VEX_W;

	// Patterns to implement vnot using vpternlog instead of creating all ones
	// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
	// so that the result is only dependent on src0. But we use the same source
	// for all operands to prevent a false dependency.
	// TODO: We should maybe have a more generalized algorithm for folding to
	// vpternlog.
	let Predicates = [HasAVX512] in {
	def : Pat<(v64i8 (vnot VR512:$src)),
	(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
	def : Pat<(v32i16 (vnot VR512:$src)),
	(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
	def : Pat<(v16i32 (vnot VR512:$src)),
	(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
	def : Pat<(v8i64 (vnot VR512:$src)),
	(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
	}

	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v16i8 (vnot VR128X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(i8 15)), sub_xmm)>;
	def : Pat<(v8i16 (vnot VR128X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(i8 15)), sub_xmm)>;
	def : Pat<(v4i32 (vnot VR128X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(i8 15)), sub_xmm)>;
	def : Pat<(v2i64 (vnot VR128X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(i8 15)), sub_xmm)>;

	def : Pat<(v32i8 (vnot VR256X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(i8 15)), sub_ymm)>;
	def : Pat<(v16i16 (vnot VR256X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(i8 15)), sub_ymm)>;
	def : Pat<(v8i32 (vnot VR256X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(i8 15)), sub_ymm)>;
	def : Pat<(v4i64 (vnot VR256X:$src)),
	(EXTRACT_SUBREG
	(VPTERNLOGQZrri
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(i8 15)), sub_ymm)>;
	}

	let Predicates = [HasVLX] in {
	def : Pat<(v16i8 (vnot VR128X:$src)),
	(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
	def : Pat<(v8i16 (vnot VR128X:$src)),
	(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
	def : Pat<(v4i32 (vnot VR128X:$src)),
	(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
	def : Pat<(v2i64 (vnot VR128X:$src)),
	(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;

	def : Pat<(v32i8 (vnot VR256X:$src)),
	(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
	def : Pat<(v16i16 (vnot VR256X:$src)),
	(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
	def : Pat<(v8i32 (vnot VR256X:$src)),
	(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
	def : Pat<(v4i64 (vnot VR256X:$src)),
	(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - FixupImm
	//===----------------------------------------------------------------------===//

	multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	X86VectorVTInfo TblVT>{
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
	Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(X86VFixupimm (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(TblVT.VT _.RC:$src3),
	(i32 timm:$src4))>, Sched<[sched]>;
	defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(X86VFixupimm (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
	(i32 timm:$src4))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
	"$src2, ${src3}"#_.BroadcastStr#", $src4",
	(X86VFixupimm (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
	(i32 timm:$src4))>,
	EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	} // Constraints = "$src1 = $dst"
	}

	multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, X86VectorVTInfo TblVT>
	: avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
	defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
	"$src2, $src3, {sae}, $src4",
	(X86VFixupimmSAE (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(TblVT.VT _.RC:$src3),
	(i32 timm:$src4))>,
	EVEX_B, Sched<[sched]>;
	}
	}

	multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
	X86FoldableSchedWrite sched, X86VectorVTInfo _,
	X86VectorVTInfo _src3VT> {
	let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
	ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(X86VFixupimms (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_src3VT.VT _src3VT.RC:$src3),
	(i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
	let Uses = [MXCSR] in
	defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
	"$src2, $src3, {sae}, $src4",
	(X86VFixupimmSAEs (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_src3VT.VT _src3VT.RC:$src3),
	(i32 timm:$src4))>,
	EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
	OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(X86VFixupimms (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_src3VT.VT (scalar_to_vector
	(_src3VT.ScalarLdFrag addr:$src3))),
	(i32 timm:$src4))>,
	Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
	}
	}

	multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _Vec,
	AVX512VLVectorVTInfo _Tbl> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
	_Vec.info512, _Tbl.info512>, AVX512AIi8Base,
	EVEX_4V, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
	_Vec.info128, _Tbl.info128>, AVX512AIi8Base,
	EVEX_4V, EVEX_V128;
	defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
	_Vec.info256, _Tbl.info256>, AVX512AIi8Base,
	EVEX_4V, EVEX_V256;
	}
	}

	defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
	SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
	defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
	SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
	avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
	avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;

	// Patterns used to select SSE scalar fp arithmetic instructions from
	// either:
	//
	// (1) a scalar fp operation followed by a blend
	//
	// The effect is that the backend no longer emits unnecessary vector
	// insert instructions immediately after SSE scalar fp instructions
	// like addss or mulss.
	//
	// For example, given the following code:
	// __m128 foo(__m128 A, __m128 B) {
	// A[0] += B[0];
	// return A;
	// }
	//
	// Previously we generated:
	// addss %xmm0, %xmm1
	// movss %xmm1, %xmm0
	//
	// We now generate:
	// addss %xmm1, %xmm0
	//
	// (2) a vector packed single/double fp operation followed by a vector insert
	//
	// The effect is that the backend converts the packed fp instruction
	// followed by a vector insert into a single SSE scalar fp instruction.
	//
	// For example, given the following code:
	// __m128 foo(__m128 A, __m128 B) {
	// __m128 C = A + B;
	// return (__m128) {c[0], a[1], a[2], a[3]};
	// }
	//
	// Previously we generated:
	// addps %xmm0, %xmm1
	// movss %xmm1, %xmm0
	//
	// We now generate:
	// addss %xmm1, %xmm0

	// TODO: Some canonicalization in lowering would simplify the number of
	// patterns we have to try to match.
	multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
	string OpcPrefix, SDNode MoveNode,
	X86VectorVTInfo _, PatLeaf ZeroFP> {
	let Predicates = [HasAVX512] in {
	// extracted scalar math op with insert via movss
	def : Pat<(MoveNode
	(_.VT VR128X:$dst),
	(_.VT (scalar_to_vector
	(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
	_.FRC:$src)))),
	(!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
	def : Pat<(MoveNode
	(_.VT VR128X:$dst),
	(_.VT (scalar_to_vector
	(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
	(_.ScalarLdFrag addr:$src))))),
	(!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;

	// extracted masked scalar math op with insert via movss
	def : Pat<(MoveNode (_.VT VR128X:$src1),
	(scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp (_.EltVT
	(extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src2),
	_.FRC:$src0))),
	(!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
	(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
	VK1WM:$mask, _.VT:$src1,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
	def : Pat<(MoveNode (_.VT VR128X:$src1),
	(scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp (_.EltVT
	(extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src2)),
	_.FRC:$src0))),
	(!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
	(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
	VK1WM:$mask, _.VT:$src1, addr:$src2)>;

	// extracted masked scalar math op with insert via movss
	def : Pat<(MoveNode (_.VT VR128X:$src1),
	(scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp (_.EltVT
	(extractelt (_.VT VR128X:$src1), (iPTR 0))),
	_.FRC:$src2), (_.EltVT ZeroFP)))),
	(!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
	VK1WM:$mask, _.VT:$src1,
	(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
	def : Pat<(MoveNode (_.VT VR128X:$src1),
	(scalar_to_vector
	(X86selects_mask VK1WM:$mask,
	(MaskedOp (_.EltVT
	(extractelt (_.VT VR128X:$src1), (iPTR 0))),
	(_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
	(!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
	}
	}

	defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;

	defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;

	defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
	defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;

	multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
	SDNode Move, X86VectorVTInfo _> {
	let Predicates = [HasAVX512] in {
	def : Pat<(_.VT (Move _.VT:$dst,
	(scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
	(!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
	}
	}

	defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
	defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
	defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;

	//===----------------------------------------------------------------------===//
	// AES instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
	let Predicates = [HasVLX, HasVAES] in {
	defm Z128 : AESI_binop_rm_int<Op, OpStr,
	!cast<Intrinsic>(IntPrefix),
	loadv2i64, 0, VR128X, i128mem>,
	EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
	defm Z256 : AESI_binop_rm_int<Op, OpStr,
	!cast<Intrinsic>(IntPrefix#"_256"),
	loadv4i64, 0, VR256X, i256mem>,
	EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
	}
	let Predicates = [HasAVX512, HasVAES] in
	defm Z : AESI_binop_rm_int<Op, OpStr,
	!cast<Intrinsic>(IntPrefix#"_512"),
	loadv8i64, 0, VR512, i512mem>,
	EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
	}

	defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
	defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
	defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
	defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;

	//===----------------------------------------------------------------------===//
	// PCLMUL instructions - Carry less multiplication
	//===----------------------------------------------------------------------===//

	let Predicates = [HasAVX512, HasVPCLMULQDQ] in
	defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
	EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;

	let Predicates = [HasVLX, HasVPCLMULQDQ] in {
	defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
	EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;

	defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
	int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
	EVEX_CD8<64, CD8VF>, VEX_WIG;
	}

	// Aliases
	defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
	defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
	defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;

	//===----------------------------------------------------------------------===//
	// VBMI2
	//===----------------------------------------------------------------------===//

	multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
	let Constraints = "$src1 = $dst",
	ExeDomain = VTI.ExeDomain in {
	defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
	"$src3, $src2", "$src2, $src3",
	(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
	T8PD, EVEX_4V, Sched<[sched]>;
	defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
	"$src3, $src2", "$src2, $src3",
	(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
	(VTI.VT (VTI.LdFrag addr:$src3))))>,
	T8PD, EVEX_4V,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
	: VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
	let Constraints = "$src1 = $dst",
	ExeDomain = VTI.ExeDomain in
	defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
	"${src3}"#VTI.BroadcastStr#", $src2",
	"$src2, ${src3}"#VTI.BroadcastStr,
	(OpNode VTI.RC:$src1, VTI.RC:$src2,
	(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
	T8PD, EVEX_4V, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
	let Predicates = [HasVBMI2] in
	defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
	EVEX_V512;
	let Predicates = [HasVBMI2, HasVLX] in {
	defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
	EVEX_V256;
	defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
	EVEX_V128;
	}
	}

	multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
	let Predicates = [HasVBMI2] in
	defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
	EVEX_V512;
	let Predicates = [HasVBMI2, HasVLX] in {
	defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
	EVEX_V256;
	defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
	EVEX_V128;
	}
	}
	multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
	SDNode OpNode, X86SchedWriteWidths sched> {
	defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
	avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
	defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
	avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
	avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
	}

	multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
	SDNode OpNode, X86SchedWriteWidths sched> {
	defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
	avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
	VEX_W, EVEX_CD8<16, CD8VF>;
	defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
	OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
	defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
	sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	}

	// Concat & Shift
	defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
	defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
	defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
	defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;

	// Compress
	defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
	avx512vl_i8_info, HasVBMI2>, EVEX,
	NotMemoryFoldable;
	defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
	avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
	NotMemoryFoldable;
	// Expand
	defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
	avx512vl_i8_info, HasVBMI2>, EVEX;
	defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
	avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;

	//===----------------------------------------------------------------------===//
	// VNNI
	//===----------------------------------------------------------------------===//

	let Constraints = "$src1 = $dst" in
	multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
	bit IsCommutable> {
	let ExeDomain = VTI.ExeDomain in {
	defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
	"$src3, $src2", "$src2, $src3",
	(VTI.VT (OpNode VTI.RC:$src1,
	VTI.RC:$src2, VTI.RC:$src3)),
	IsCommutable, IsCommutable>,
	EVEX_4V, T8PD, Sched<[sched]>;
	defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
	"$src3, $src2", "$src2, $src3",
	(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
	(VTI.VT (VTI.LdFrag addr:$src3))))>,
	EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
	OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
	"$src2, ${src3}"#VTI.BroadcastStr,
	(OpNode VTI.RC:$src1, VTI.RC:$src2,
	(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
	EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
	T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
	}
	}

	multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
	X86SchedWriteWidths sched, bit IsCommutable> {
	let Predicates = [HasVNNI] in
	defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
	IsCommutable>, EVEX_V512;
	let Predicates = [HasVNNI, HasVLX] in {
	defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
	IsCommutable>, EVEX_V256;
	defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
	IsCommutable>, EVEX_V128;
	}
	}

	// FIXME: Is there a better scheduler class for VPDP?
	defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
	defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
	defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
	defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;

	// Patterns to match VPDPWSSD from existing instructions/intrinsics.
	let Predicates = [HasVNNI] in {
	def : Pat<(v16i32 (add VR512:$src1,
	(X86vpmaddwd_su VR512:$src2, VR512:$src3))),
	(VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
	def : Pat<(v16i32 (add VR512:$src1,
	(X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
	(VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
	}
	let Predicates = [HasVNNI,HasVLX] in {
	def : Pat<(v8i32 (add VR256X:$src1,
	(X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
	(VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
	def : Pat<(v8i32 (add VR256X:$src1,
	(X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
	(VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
	def : Pat<(v4i32 (add VR128X:$src1,
	(X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
	(VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
	def : Pat<(v4i32 (add VR128X:$src1,
	(X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
	(VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
	}

	//===----------------------------------------------------------------------===//
	// Bit Algorithms
	//===----------------------------------------------------------------------===//

	// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
	defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
	avx512vl_i8_info, HasBITALG>;
	defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
	avx512vl_i16_info, HasBITALG>, VEX_W;

	defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
	defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;

	def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
	(X86Vpshufbitqmb node:$src1, node:$src2), [{
	return N->hasOneUse();
	}]>;

	multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
	defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
	(ins VTI.RC:$src1, VTI.RC:$src2),
	"vpshufbitqmb",
	"$src2, $src1", "$src1, $src2",
	(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
	(VTI.VT VTI.RC:$src2)),
	(X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
	(VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
	Sched<[sched]>;
	defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
	(ins VTI.RC:$src1, VTI.MemOp:$src2),
	"vpshufbitqmb",
	"$src2, $src1", "$src1, $src2",
	(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
	(VTI.VT (VTI.LdFrag addr:$src2))),
	(X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
	(VTI.VT (VTI.LdFrag addr:$src2)))>,
	EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
	let Predicates = [HasBITALG] in
	defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
	let Predicates = [HasBITALG, HasVLX] in {
	defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
	defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
	}
	}

	// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
	defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;

	//===----------------------------------------------------------------------===//
	// GFNI
	//===----------------------------------------------------------------------===//

	multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	let Predicates = [HasGFNI, HasAVX512] in
	defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
	EVEX_V512;
	let Predicates = [HasGFNI, HasVLX] in {
	defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
	EVEX_V256;
	defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
	EVEX_V128;
	}
	}

	defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
	SchedWriteVecALU>,
	EVEX_CD8<8, CD8VF>, T8PD;

	multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
	X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
	X86VectorVTInfo BcstVTI>
	: avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
	let ExeDomain = VTI.ExeDomain in
	defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
	(ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
	OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
	"$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
	(OpNode (VTI.VT VTI.RC:$src1),
	(bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
	(i8 timm:$src3))>, EVEX_B,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
	X86SchedWriteWidths sched> {
	let Predicates = [HasGFNI, HasAVX512] in
	defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
	v64i8_info, v8i64_info>, EVEX_V512;
	let Predicates = [HasGFNI, HasVLX] in {
	defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
	v32i8x_info, v4i64x_info>, EVEX_V256;
	defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
	v16i8x_info, v2i64x_info>, EVEX_V128;
	}
	}

	defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
	X86GF2P8affineinvqb, SchedWriteVecIMul>,
	EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
	defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
	X86GF2P8affineqb, SchedWriteVecIMul>,
	EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;


	//===----------------------------------------------------------------------===//
	// AVX5124FMAPS
	//===----------------------------------------------------------------------===//

	let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
	Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
	defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
	(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
	"v4fmaddps", "$src3, $src2", "$src2, $src3",
	[]>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
	Sched<[SchedWriteFMA.ZMM.Folded]>;

	defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
	(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
	"v4fnmaddps", "$src3, $src2", "$src2, $src3",
	[]>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
	Sched<[SchedWriteFMA.ZMM.Folded]>;

	defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
	(outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
	"v4fmaddss", "$src3, $src2", "$src2, $src3",
	[]>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
	Sched<[SchedWriteFMA.Scl.Folded]>;

	defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
	(outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
	"v4fnmaddss", "$src3, $src2", "$src2, $src3",
	[]>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
	Sched<[SchedWriteFMA.Scl.Folded]>;
	}

	//===----------------------------------------------------------------------===//
	// AVX5124VNNIW
	//===----------------------------------------------------------------------===//

	let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
	Constraints = "$src1 = $dst" in {
	defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
	(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
	"vp4dpwssd", "$src3, $src2", "$src2, $src3",
	[]>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
	Sched<[SchedWriteFMA.ZMM.Folded]>;

	defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
	(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
	"vp4dpwssds", "$src3, $src2", "$src2, $src3",
	[]>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
	Sched<[SchedWriteFMA.ZMM.Folded]>;
	}

	let hasSideEffects = 0 in {
	let mayStore = 1, SchedRW = [WriteFStoreX] in
	def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
	let mayLoad = 1, SchedRW = [WriteFLoadX] in
	def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
	}

	//===----------------------------------------------------------------------===//
	// VP2INTERSECT
	//===----------------------------------------------------------------------===//

	multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
	def rr : I<0x68, MRMSrcReg,
	(outs _.KRPC:$dst),
	(ins _.RC:$src1, _.RC:$src2),
	!strconcat("vp2intersect", _.Suffix,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRPC:$dst, (X86vp2intersect
	_.RC:$src1, (_.VT _.RC:$src2)))]>,
	EVEX_4V, T8XD, Sched<[sched]>;

	def rm : I<0x68, MRMSrcMem,
	(outs _.KRPC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2),
	!strconcat("vp2intersect", _.Suffix,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRPC:$dst, (X86vp2intersect
	_.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
	EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	def rmb : I<0x68, MRMSrcMem,
	(outs _.KRPC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
	", $src1, $dst\|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
	[(set _.KRPC:$dst, (X86vp2intersect
	_.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
	EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
	Sched<[sched.Folded, sched.ReadAfterFold]>;
	}

	multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512, HasVP2INTERSECT] in
	defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
	defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
	defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
	}
	}

	defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
	defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;

	multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched,
	AVX512VLVectorVTInfo _SrcVTInfo,
	AVX512VLVectorVTInfo _DstVTInfo,
	SDNode OpNode, Predicate prd,
	bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
	_SrcVTInfo.info512, _DstVTInfo.info512,
	_SrcVTInfo.info512, IsCommutable>,
	EVEX_V512, EVEX_CD8<32, CD8VF>;
	let Predicates = [HasVLX, prd] in {
	defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
	_SrcVTInfo.info256, _DstVTInfo.info256,
	_SrcVTInfo.info256, IsCommutable>,
	EVEX_V256, EVEX_CD8<32, CD8VF>;
	defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
	_SrcVTInfo.info128, _DstVTInfo.info128,
	_SrcVTInfo.info128, IsCommutable>,
	EVEX_V128, EVEX_CD8<32, CD8VF>;
	}
	}

	let ExeDomain = SSEPackedSingle in
	defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
	SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
	avx512vl_f32_info, avx512vl_bf16_info,
	X86cvtne2ps2bf16, HasBF16, 0>, T8XD;

	// Truncate Float to BFloat16
	multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
	X86SchedWriteWidths sched> {
	let ExeDomain = SSEPackedSingle in {
	let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
	X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasBF16, HasVLX] in {
	let Uses = []<Register>, mayRaiseFPException = 0 in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
	null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
	VK4WM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
	X86cvtneps2bf16, X86cvtneps2bf16,
	sched.YMM, "{1to8}", "{y}">, EVEX_V256;
	}
	} // Predicates = [HasBF16, HasVLX]
	} // ExeDomain = SSEPackedSingle

	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
	VR128X:$src), 0>;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
	f128mem:$src), 0, "intel">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
	VR256X:$src), 0>;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
	f256mem:$src), 0, "intel">;
	}

	defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
	SchedWriteCvtPD2PS>, T8XS,
	EVEX_CD8<32, CD8VF>;

	let Predicates = [HasBF16, HasVLX] in {
	// Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
	(VCVTNEPS2BF16Z128rr VR128X:$src)>;
	def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
	def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;

	def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
	(VCVTNEPS2BF16Z128rm addr:$src)>;
	def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
	(X86VBroadcastld32 addr:$src)))),
	(VCVTNEPS2BF16Z128rmb addr:$src)>;
	def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
	(v8bf16 VR128X:$src0), VK4WM:$mask),
	(VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
	v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
	(VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
	(VCVTNEPS2BF16Z128rr VR128X:$src)>;
	def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
	(VCVTNEPS2BF16Z128rm addr:$src)>;

	def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
	(VCVTNEPS2BF16Z256rr VR256X:$src)>;
	def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
	(VCVTNEPS2BF16Z256rm addr:$src)>;
	+
	+ def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
	+ (VPBROADCASTWZ128rm addr:$src)>;
	+ def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
	+ (VPBROADCASTWZ256rm addr:$src)>;
	+
	+ def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
	+ (VPBROADCASTWZ128rr VR128X:$src)>;
	+ def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
	+ (VPBROADCASTWZ256rr VR128X:$src)>;
	+
	+ // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
	+}
	+
	+let Predicates = [HasBF16] in {
	+ def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
	+ (VPBROADCASTWZrm addr:$src)>;
	+
	+ def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
	+ (VPBROADCASTWZrr VR128X:$src)>;
	+ // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
	}

	let Constraints = "$src1 = $dst" in {
	multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86FoldableSchedWrite sched,
	X86VectorVTInfo _, X86VectorVTInfo src_v> {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins src_v.RC:$src2, src_v.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
	EVEX_4V, Sched<[sched]>;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins src_v.RC:$src2, src_v.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
	(src_v.LdFrag addr:$src3)))>, EVEX_4V,
	Sched<[sched.Folded, sched.ReadAfterFold]>;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins src_v.RC:$src2, f32mem:$src3),
	OpcodeStr,
	!strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr),
	(_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
	(src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
	EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;

	}
	} // Constraints = "$src1 = $dst"

	multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
	AVX512VLVectorVTInfo src_v, Predicate prd> {
	let Predicates = [prd] in {
	defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
	src_v.info512>, EVEX_V512;
	}
	let Predicates = [HasVLX, prd] in {
	defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
	src_v.info256>, EVEX_V256;
	defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
	src_v.info128>, EVEX_V128;
	}
	}

	let ExeDomain = SSEPackedSingle in
	defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
	avx512vl_f32_info, avx512vl_bf16_info,
	HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;

	//===----------------------------------------------------------------------===//
	// AVX512FP16
	//===----------------------------------------------------------------------===//

	let Predicates = [HasFP16] in {
	// Move word ( r/m16) to Packed word
	def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
	"vmovw\t{$src, $dst\|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
	def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
	"vmovw\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
	T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;

	def : Pat<(f16 (bitconvert GR16:$src)),
	(f16 (COPY_TO_REGCLASS
	(VMOVW2SHrr
	(INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
	FR16X))>;
	def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
	(VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
	def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
	(VMOVW2SHrr GR32:$src)>;
	// FIXME: We should really find a way to improve these patterns.
	def : Pat<(v8i32 (X86vzmovl
	(insert_subvector undef,
	(v4i32 (scalar_to_vector
	(and GR32:$src, 0xffff))),
	(iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
	def : Pat<(v16i32 (X86vzmovl
	(insert_subvector undef,
	(v4i32 (scalar_to_vector
	(and GR32:$src, 0xffff))),
	(iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;

	def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
	(VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;

	// AVX 128-bit movw instruction write zeros in the high 128-bit part.
	def : Pat<(v8i16 (X86vzload16 addr:$src)),
	(VMOVWrm addr:$src)>;
	def : Pat<(v16i16 (X86vzload16 addr:$src)),
	(SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;

	// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
	def : Pat<(v32i16 (X86vzload16 addr:$src)),
	(SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;

	def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
	(VMOVWrm addr:$src)>;
	def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
	(VMOVWrm addr:$src)>;
	def : Pat<(v8i32 (X86vzmovl
	(insert_subvector undef,
	(v4i32 (scalar_to_vector
	(i32 (zextloadi16 addr:$src)))),
	(iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
	def : Pat<(v16i32 (X86vzmovl
	(insert_subvector undef,
	(v4i32 (scalar_to_vector
	(i32 (zextloadi16 addr:$src)))),
	(iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;

	// Move word from xmm register to r/m16
	def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
	"vmovw\t{$src, $dst\|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
	def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
	(ins i16mem:$dst, VR128X:$src),
	"vmovw\t{$src, $dst\|$dst, $src}",
	[(store (i16 (extractelt (v8i16 VR128X:$src),
	(iPTR 0))), addr:$dst)]>,
	T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;

	def : Pat<(i16 (bitconvert FR16X:$src)),
	(i16 (EXTRACT_SUBREG
	(VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
	sub_16bit))>;
	def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
	(i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;

	// Allow "vmovw" to use GR64
	let hasSideEffects = 0 in {
	def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
	"vmovw\t{$src, $dst\|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
	def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
	"vmovw\t{$src, $dst\|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
	}
	}

	// Convert 16-bit float to i16/u16
	multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	AVX512VLVectorVTInfo _Dst,
	AVX512VLVectorVTInfo _Src,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
	OpNode, MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
	OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
	OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert 16-bit float to i16/u16 truncate
	multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
	OpNode, MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
	OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
	OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, avx512vl_i16_info,
	avx512vl_f16_info, SchedWriteCvtPD2DQ>,
	T_MAP5PS, EVEX_CD8<16, CD8VF>;
	defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
	X86VUintToFpRnd, avx512vl_f16_info,
	avx512vl_i16_info, SchedWriteCvtPD2DQ>,
	T_MAP5XD, EVEX_CD8<16, CD8VF>;
	defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	avx512vl_i16_info, avx512vl_f16_info,
	SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
	defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	avx512vl_i16_info, avx512vl_f16_info,
	SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
	defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, avx512vl_i16_info,
	avx512vl_f16_info, SchedWriteCvtPD2DQ>,
	T_MAP5PD, EVEX_CD8<16, CD8VF>;
	defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
	X86VSintToFpRnd, avx512vl_f16_info,
	avx512vl_i16_info, SchedWriteCvtPD2DQ>,
	T_MAP5XS, EVEX_CD8<16, CD8VF>;

	// Convert Half to Signed/Unsigned Doubleword
	multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}

	// Convert Half to Signed/Unsigned Doubleword with truncation
	multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.YMM>, EVEX_V256;
	}
	}


	defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
	EVEX_CD8<16, CD8VH>;
	defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
	EVEX_CD8<16, CD8VH>;

	defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	SchedWriteCvtPS2DQ>, T_MAP5XS,
	EVEX_CD8<16, CD8VH>;

	defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	SchedWriteCvtPS2DQ>, T_MAP5PS,
	EVEX_CD8<16, CD8VH>;

	// Convert Half to Signed/Unsigned Quardword
	multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	// Explicitly specified broadcast string, since we take only 2 elements
	// from v8f16x_info source
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
	EVEX_V128;
	// Explicitly specified broadcast string, since we take only 4 elements
	// from v8f16x_info source
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
	EVEX_V256;
	}
	}

	// Convert Half to Signed/Unsigned Quardword with truncation
	multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
	MaskOpNode, sched.ZMM>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	// Explicitly specified broadcast string, since we take only 2 elements
	// from v8f16x_info source
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
	// Explicitly specified broadcast string, since we take only 4 elements
	// from v8f16x_info source
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
	MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
	}
	}

	defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
	X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
	EVEX_CD8<16, CD8VQ>;

	defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
	X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
	EVEX_CD8<16, CD8VQ>;

	defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
	X86cvttp2si, X86cvttp2siSAE,
	SchedWriteCvtPS2DQ>, T_MAP5PD,
	EVEX_CD8<16, CD8VQ>;

	defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
	X86cvttp2ui, X86cvttp2uiSAE,
	SchedWriteCvtPS2DQ>, T_MAP5PD,
	EVEX_CD8<16, CD8VQ>;

	// Convert Signed/Unsigned Quardword to Half
	multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
	X86SchedWriteWidths sched> {
	// we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
	// 512 memory forms of these instructions in Asm Parcer. They have the same
	// dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	let Predicates = [HasFP16] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
	MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
	OpNodeRnd, sched.ZMM>, EVEX_V512;
	}
	let Predicates = [HasFP16, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
	null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
	i128mem, VK2WM>,
	EVEX_V128, NotEVEX2VEXConvertible;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
	null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
	i256mem, VK4WM>,
	EVEX_V256, NotEVEX2VEXConvertible;
	}

	def : InstAlias<OpcodeStr#"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
	VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}\|$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
	VK2WM:$mask, VR128X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst\|$dst, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
	VK2WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to2}}",
	(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
	VK2WM:$mask, i64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
	VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}\|"
	"$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
	VK4WM:$mask, VR256X:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst\|$dst, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
	VK4WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to4}}",
	(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
	VK4WM:$mask, i64mem:$src), 0, "att">;

	def : InstAlias<OpcodeStr#"z\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
	VR512:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}\|"
	"$dst {${mask}}, $src}",
	(!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
	VK8WM:$mask, VR512:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, $src}",
	(!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
	VK8WM:$mask, VR512:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst\|$dst, ${src}{1to8}}",
	(!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
	i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}\|"
	"$dst {${mask}}, ${src}{1to8}}",
	(!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
	VK8WM:$mask, i64mem:$src), 0, "att">;
	def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}\|"
	"$dst {${mask}} {z}, ${src}{1to8}}",
	(!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
	VK8WM:$mask, i64mem:$src), 0, "att">;
	}

	defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
	X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
	EVEX_CD8<64, CD8VF>;

	defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
	X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
	EVEX_CD8<64, CD8VF>;

	// Convert half to signed/unsigned int 32/64
	defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
	X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
	T_MAP5XS, EVEX_CD8<16, CD8VT1>;
	defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
	X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
	T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
	defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
	X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
	T_MAP5XS, EVEX_CD8<16, CD8VT1>;
	defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
	X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
	T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;

	defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
	any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
	"{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
	defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
	any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
	"{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
	defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
	any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
	"{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
	defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
	any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
	"{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;

	let Predicates = [HasFP16] in {
	defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
	v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
	T_MAP5XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
	v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
	T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
	v8f16x_info, i32mem, loadi32,
	"cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
	v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
	T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
	def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;

	def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;


	def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
	(VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
	(VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;

	def : Pat<(f16 (any_sint_to_fp GR32:$src)),
	(VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f16 (any_sint_to_fp GR64:$src)),
	(VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;

	def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
	(VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
	(VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;

	def : Pat<(f16 (any_uint_to_fp GR32:$src)),
	(VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f16 (any_uint_to_fp GR64:$src)),
	(VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;

	// Patterns used for matching vcvtsi2sh intrinsic sequences from clang
	// which produce unnecessary vmovsh instructions
	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
	(VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
	(VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
	(VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
	(VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
	(VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
	(VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
	(VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v8f16 (X86Movsh
	(v8f16 VR128X:$dst),
	(v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
	(VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
	} // Predicates = [HasFP16]

	let Predicates = [HasFP16, HasVLX] in {
	// Special patterns to allow use of X86VMSintToFP for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
	(VCVTQQ2PHZ256rr VR256X:$src)>;
	def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
	def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;

	def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
	(VCVTQQ2PHZ256rm addr:$src)>;
	def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
	(VCVTQQ2PHZ256rmb addr:$src)>;
	def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
	(v8f16 VR128X:$src0), VK4WM:$mask),
	(VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
	v8f16x_info.ImmAllZerosV, VK4WM:$mask),
	(VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
	(VCVTQQ2PHZ128rr VR128X:$src)>;
	def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
	VK2WM:$mask),
	(VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
	(VCVTQQ2PHZ128rm addr:$src)>;
	def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
	VK2WM:$mask),
	(VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
	(VCVTQQ2PHZ128rmb addr:$src)>;
	def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
	(v8f16 VR128X:$src0), VK2WM:$mask),
	(VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
	v8f16x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;

	// Special patterns to allow use of X86VMUintToFP for masking. Instruction
	// patterns have been disabled with null_frag.
	def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
	(VCVTUQQ2PHZ256rr VR256X:$src)>;
	def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
	def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;

	def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
	(VCVTUQQ2PHZ256rm addr:$src)>;
	def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
	VK4WM:$mask),
	(VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
	VK4WM:$mask),
	(VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
	(VCVTUQQ2PHZ256rmb addr:$src)>;
	def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
	(v8f16 VR128X:$src0), VK4WM:$mask),
	(VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
	def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
	v8f16x_info.ImmAllZerosV, VK4WM:$mask),
	(VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
	(VCVTUQQ2PHZ128rr VR128X:$src)>;
	def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
	VK2WM:$mask),
	(VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
	def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;

	def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
	(VCVTUQQ2PHZ128rm addr:$src)>;
	def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
	VK2WM:$mask),
	(VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
	VK2WM:$mask),
	(VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
	(VCVTUQQ2PHZ128rmb addr:$src)>;
	def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
	(v8f16 VR128X:$src0), VK2WM:$mask),
	(VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
	v8f16x_info.ImmAllZerosV, VK2WM:$mask),
	(VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
	}

	let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
	multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
	(_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
	}
	} // Constraints = "@earlyclobber $dst, $src1 = $dst"

	multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let Constraints = "@earlyclobber $dst, $src1 = $dst" in
	defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
	EVEX_4V, EVEX_B, EVEX_RC;
	}


	multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
	avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
	EVEX_V512, Sched<[WriteFMAZ]>;
	}
	let Predicates = [HasVLX, HasFP16] in {
	defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
	defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
	}
	}

	multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
	let Predicates = [HasFP16] in {
	defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
	WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
	avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
	"", "@earlyclobber $dst">, EVEX_V512;
	}
	let Predicates = [HasVLX, HasFP16] in {
	defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
	WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
	defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
	WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
	}
	}


	let Uses = [MXCSR] in {
	defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
	T_MAP6XS, EVEX_CD8<32, CD8VF>;
	defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
	T_MAP6XD, EVEX_CD8<32, CD8VF>;

	defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
	x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
	defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
	x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
	}


	multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
	bit IsCommutable> {
	let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
	defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
	(ins VR128X:$src2, VR128X:$src3), OpcodeStr,
	"$src3, $src2", "$src2, $src3",
	(v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
	Sched<[WriteFMAX]>;
	defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
	(ins VR128X:$src2, ssmem:$src3), OpcodeStr,
	"$src3, $src2", "$src2, $src3",
	(v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
	Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
	defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
	(ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
	"$rc, $src3, $src2", "$src2, $src3, $rc",
	(v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
	EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
	}
	}

	multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd, bit IsCommutable> {
	let Predicates = [HasFP16] in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
	IsCommutable, IsCommutable, IsCommutable,
	X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
	defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
	(ins VR128X:$src1, ssmem:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
	0, 0, 0, X86selects, "@earlyclobber $dst">,
	Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
	defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
	0, 0, 0, X86selects, "@earlyclobber $dst">,
	EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
	}
	}

	let Uses = [MXCSR] in {
	defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
	T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
	defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
	T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;

	defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
	T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
	defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
	T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
	}
	diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
	index 73c341891ab7..33adf15fccaf 100644
	--- a/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
	+++ b/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
	@@ -1,761 +1,764 @@
	//===-- llvm-mca.cpp - Machine Code Analyzer -------------------- C++ - -===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This utility is a simple driver that allows static performance analysis on
	// machine code similarly to how IACA (Intel Architecture Code Analyzer) works.
	//
	// llvm-mca [options] <file-name>
	// -march <type>
	// -mcpu <cpu>
	// -o <file>
	//
	// The target defaults to the host target.
	// The cpu defaults to the 'native' host cpu.
	// The output defaults to standard output.
	//
	//===----------------------------------------------------------------------===//

	#include "CodeRegion.h"
	#include "CodeRegionGenerator.h"
	#include "PipelinePrinter.h"
	#include "Views/BottleneckAnalysis.h"
	#include "Views/DispatchStatistics.h"
	#include "Views/InstructionInfoView.h"
	#include "Views/RegisterFileStatistics.h"
	#include "Views/ResourcePressureView.h"
	#include "Views/RetireControlUnitStatistics.h"
	#include "Views/SchedulerStatistics.h"
	#include "Views/SummaryView.h"
	#include "Views/TimelineView.h"
	#include "llvm/MC/MCAsmBackend.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCCodeEmitter.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCObjectFileInfo.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/MC/MCSubtargetInfo.h"
	#include "llvm/MC/MCTargetOptionsCommandFlags.h"
	#include "llvm/MC/TargetRegistry.h"
	#include "llvm/MCA/CodeEmitter.h"
	#include "llvm/MCA/Context.h"
	#include "llvm/MCA/CustomBehaviour.h"
	#include "llvm/MCA/InstrBuilder.h"
	#include "llvm/MCA/Pipeline.h"
	#include "llvm/MCA/Stages/EntryStage.h"
	#include "llvm/MCA/Stages/InstructionTables.h"
	#include "llvm/MCA/Support.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/ErrorOr.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/InitLLVM.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/SourceMgr.h"
	#include "llvm/Support/TargetSelect.h"
	#include "llvm/Support/ToolOutputFile.h"
	#include "llvm/Support/WithColor.h"

	using namespace llvm;

	static mc::RegisterMCTargetOptionsFlags MOF;

	static cl::OptionCategory ToolOptions("Tool Options");
	static cl::OptionCategory ViewOptions("View Options");

	static cl::opt<std::string> InputFilename(cl::Positional,
	cl::desc("<input file>"),
	cl::cat(ToolOptions), cl::init("-"));

	static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
	cl::init("-"), cl::cat(ToolOptions),
	cl::value_desc("filename"));

	static cl::opt<std::string>
	ArchName("march",
	cl::desc("Target architecture. "
	"See -version for available targets"),
	cl::cat(ToolOptions));

	static cl::opt<std::string>
	TripleName("mtriple",
	cl::desc("Target triple. See -version for available targets"),
	cl::cat(ToolOptions));

	static cl::opt<std::string>
	MCPU("mcpu",
	cl::desc("Target a specific cpu type (-mcpu=help for details)"),
	cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native"));

	static cl::list<std::string>
	MATTRS("mattr", cl::CommaSeparated,
	cl::desc("Target specific attributes (-mattr=help for details)"),
	cl::value_desc("a1,+a2,-a3,..."), cl::cat(ToolOptions));

	static cl::opt<bool> PrintJson("json",
	cl::desc("Print the output in json format"),
	cl::cat(ToolOptions), cl::init(false));

	static cl::opt<int>
	OutputAsmVariant("output-asm-variant",
	cl::desc("Syntax variant to use for output printing"),
	cl::cat(ToolOptions), cl::init(-1));

	static cl::opt<bool>
	PrintImmHex("print-imm-hex", cl::cat(ToolOptions), cl::init(false),
	cl::desc("Prefer hex format when printing immediate values"));

	static cl::opt<unsigned> Iterations("iterations",
	cl::desc("Number of iterations to run"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<unsigned>
	DispatchWidth("dispatch", cl::desc("Override the processor dispatch width"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<unsigned>
	RegisterFileSize("register-file-size",
	cl::desc("Maximum number of physical registers which can "
	"be used for register mappings"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<unsigned>
	MicroOpQueue("micro-op-queue-size", cl::Hidden,
	cl::desc("Number of entries in the micro-op queue"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<unsigned>
	DecoderThroughput("decoder-throughput", cl::Hidden,
	cl::desc("Maximum throughput from the decoders "
	"(instructions per cycle)"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<bool>
	PrintRegisterFileStats("register-file-stats",
	cl::desc("Print register file statistics"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> PrintDispatchStats("dispatch-stats",
	cl::desc("Print dispatch statistics"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool>
	PrintSummaryView("summary-view", cl::Hidden,
	cl::desc("Print summary view (enabled by default)"),
	cl::cat(ViewOptions), cl::init(true));

	static cl::opt<bool> PrintSchedulerStats("scheduler-stats",
	cl::desc("Print scheduler statistics"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool>
	PrintRetireStats("retire-stats",
	cl::desc("Print retire control unit statistics"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> PrintResourcePressureView(
	"resource-pressure",
	cl::desc("Print the resource pressure view (enabled by default)"),
	cl::cat(ViewOptions), cl::init(true));

	static cl::opt<bool> PrintTimelineView("timeline",
	cl::desc("Print the timeline view"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<unsigned> TimelineMaxIterations(
	"timeline-max-iterations",
	cl::desc("Maximum number of iterations to print in timeline view"),
	cl::cat(ViewOptions), cl::init(0));

	static cl::opt<unsigned>
	TimelineMaxCycles("timeline-max-cycles",
	cl::desc("Maximum number of cycles in the timeline view, "
	"or 0 for unlimited. Defaults to 80 cycles"),
	cl::cat(ViewOptions), cl::init(80));

	static cl::opt<bool>
	AssumeNoAlias("noalias",
	cl::desc("If set, assume that loads and stores do not alias"),
	cl::cat(ToolOptions), cl::init(true));

	static cl::opt<unsigned> LoadQueueSize("lqueue",
	cl::desc("Size of the load queue"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<unsigned> StoreQueueSize("squeue",
	cl::desc("Size of the store queue"),
	cl::cat(ToolOptions), cl::init(0));

	static cl::opt<bool>
	PrintInstructionTables("instruction-tables",
	cl::desc("Print instruction tables"),
	cl::cat(ToolOptions), cl::init(false));

	static cl::opt<bool> PrintInstructionInfoView(
	"instruction-info",
	cl::desc("Print the instruction info view (enabled by default)"),
	cl::cat(ViewOptions), cl::init(true));

	static cl::opt<bool> EnableAllStats("all-stats",
	cl::desc("Print all hardware statistics"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool>
	EnableAllViews("all-views",
	cl::desc("Print all views including hardware statistics"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> EnableBottleneckAnalysis(
	"bottleneck-analysis",
	cl::desc("Enable bottleneck analysis (disabled by default)"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> ShowEncoding(
	"show-encoding",
	cl::desc("Print encoding information in the instruction info view"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> ShowBarriers(
	"show-barriers",
	cl::desc("Print memory barrier information in the instruction info view"),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> DisableCustomBehaviour(
	"disable-cb",
	cl::desc(
	"Disable custom behaviour (use the default class which does nothing)."),
	cl::cat(ViewOptions), cl::init(false));

	static cl::opt<bool> DisableInstrumentManager(
	"disable-im",
	cl::desc("Disable instrumentation manager (use the default class which "
	"ignores instruments.)."),
	cl::cat(ViewOptions), cl::init(false));

	namespace {

	const Target getTarget(const char ProgName) {
	if (TripleName.empty())
	TripleName = Triple::normalize(sys::getDefaultTargetTriple());
	Triple TheTriple(TripleName);

	// Get the target specific parser.
	std::string Error;
	const Target *TheTarget =
	TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
	if (!TheTarget) {
	errs() << ProgName << ": " << Error;
	return nullptr;
	}

	// Update TripleName with the updated triple from the target lookup.
	TripleName = TheTriple.str();

	// Return the found target.
	return TheTarget;
	}

	ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
	if (OutputFilename == "")
	OutputFilename = "-";
	std::error_code EC;
	auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC,
	sys::fs::OF_TextWithCRLF);
	if (!EC)
	return std::move(Out);
	return EC;
	}
	} // end of anonymous namespace

	static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) {
	if (!O.getNumOccurrences() \|\| O.getPosition() < Default.getPosition())
	O = Default.getValue();
	}

	static void processViewOptions(bool IsOutOfOrder) {
	if (!EnableAllViews.getNumOccurrences() &&
	!EnableAllStats.getNumOccurrences())
	return;

	if (EnableAllViews.getNumOccurrences()) {
	processOptionImpl(PrintSummaryView, EnableAllViews);
	if (IsOutOfOrder)
	processOptionImpl(EnableBottleneckAnalysis, EnableAllViews);
	processOptionImpl(PrintResourcePressureView, EnableAllViews);
	processOptionImpl(PrintTimelineView, EnableAllViews);
	processOptionImpl(PrintInstructionInfoView, EnableAllViews);
	}

	const cl::opt<bool> &Default =
	EnableAllViews.getPosition() < EnableAllStats.getPosition()
	? EnableAllStats
	: EnableAllViews;
	processOptionImpl(PrintRegisterFileStats, Default);
	processOptionImpl(PrintDispatchStats, Default);
	processOptionImpl(PrintSchedulerStats, Default);
	if (IsOutOfOrder)
	processOptionImpl(PrintRetireStats, Default);
	}

	// Returns true on success.
	static bool runPipeline(mca::Pipeline &P) {
	// Handle pipeline errors here.
	Expected<unsigned> Cycles = P.run();
	if (!Cycles) {
	WithColor::error() << toString(Cycles.takeError());
	return false;
	}
	return true;
	}

	int main(int argc, char **argv) {
	InitLLVM X(argc, argv);

	// Initialize targets and assembly parsers.
	InitializeAllTargetInfos();
	InitializeAllTargetMCs();
	InitializeAllAsmParsers();
	InitializeAllTargetMCAs();

	// Register the Target and CPU printer for --version.
	cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU);

	// Enable printing of available targets when flag --version is specified.
	cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);

	cl::HideUnrelatedOptions({&ToolOptions, &ViewOptions});

	// Parse flags and initialize target options.
	cl::ParseCommandLineOptions(argc, argv,
	"llvm machine code performance analyzer.\n");

	// Get the target from the triple. If a triple is not specified, then select
	// the default triple for the host. If the triple doesn't correspond to any
	// registered target, then exit with an error message.
	const char *ProgName = argv[0];
	const Target *TheTarget = getTarget(ProgName);
	if (!TheTarget)
	return 1;

	// GetTarget() may replaced TripleName with a default triple.
	// For safety, reconstruct the Triple object.
	Triple TheTriple(TripleName);

	ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
	MemoryBuffer::getFileOrSTDIN(InputFilename);
	if (std::error_code EC = BufferPtr.getError()) {
	WithColor::error() << InputFilename << ": " << EC.message() << '\n';
	return 1;
	}

	if (MCPU == "native")
	MCPU = std::string(llvm::sys::getHostCPUName());

	// Package up features to be passed to target/subtarget
	std::string FeaturesStr;
	if (MATTRS.size()) {
	SubtargetFeatures Features;
	for (std::string &MAttr : MATTRS)
	Features.AddFeature(MAttr);
	FeaturesStr = Features.getString();
	}

	std::unique_ptr<MCSubtargetInfo> STI(
	TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
	assert(STI && "Unable to create subtarget info!");
	if (!STI->isCPUStringValid(MCPU))
	return 1;

	if (!STI->getSchedModel().hasInstrSchedModel()) {
	WithColor::error()
	<< "unable to find instruction-level scheduling information for"
	<< " target triple '" << TheTriple.normalize() << "' and cpu '" << MCPU
	<< "'.\n";

	if (STI->getSchedModel().InstrItineraries)
	WithColor::note()
	<< "cpu '" << MCPU << "' provides itineraries. However, "
	<< "instruction itineraries are currently unsupported.\n";
	return 1;
	}

	// Apply overrides to llvm-mca specific options.
	bool IsOutOfOrder = STI->getSchedModel().isOutOfOrder();
	processViewOptions(IsOutOfOrder);

	std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
	assert(MRI && "Unable to create target register info!");

	MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
	std::unique_ptr<MCAsmInfo> MAI(
	TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
	assert(MAI && "Unable to create target asm info!");

	SourceMgr SrcMgr;

	// Tell SrcMgr about this buffer, which is what the parser will pick up.
	SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());

	- MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
	- std::unique_ptr<MCObjectFileInfo> MOFI(
	- TheTarget->createMCObjectFileInfo(Ctx, /PIC=/false));
	- Ctx.setObjectFileInfo(MOFI.get());
	-
	std::unique_ptr<buffer_ostream> BOS;

	std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
	assert(MCII && "Unable to create instruction info!");

	std::unique_ptr<MCInstrAnalysis> MCIA(
	TheTarget->createMCInstrAnalysis(MCII.get()));

	// Need to initialize an MCInstPrinter as it is
	// required for initializing the MCTargetStreamer
	// which needs to happen within the CRG.parseAnalysisRegions() call below.
	// Without an MCTargetStreamer, certain assembly directives can trigger a
	// segfault. (For example, the .cv_fpo_proc directive on x86 will segfault if
	// we don't initialize the MCTargetStreamer.)
	unsigned IPtempOutputAsmVariant =
	OutputAsmVariant == -1 ? 0 : OutputAsmVariant;
	std::unique_ptr<MCInstPrinter> IPtemp(TheTarget->createMCInstPrinter(
	Triple(TripleName), IPtempOutputAsmVariant, MAI, MCII, *MRI));
	if (!IPtemp) {
	WithColor::error()
	<< "unable to create instruction printer for target triple '"
	<< TheTriple.normalize() << "' with assembly variant "
	<< IPtempOutputAsmVariant << ".\n";
	return 1;
	}

	// Parse the input and create CodeRegions that llvm-mca can analyze.
	- mca::AsmAnalysisRegionGenerator CRG(TheTarget, SrcMgr, Ctx, MAI, *STI,
	+ MCContext ACtx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
	+ std::unique_ptr<MCObjectFileInfo> AMOFI(
	+ TheTarget->createMCObjectFileInfo(ACtx, /PIC=/false));
	+ ACtx.setObjectFileInfo(AMOFI.get());
	+ mca::AsmAnalysisRegionGenerator CRG(TheTarget, SrcMgr, ACtx, MAI, *STI,
	*MCII);
	Expected<const mca::AnalysisRegions &> RegionsOrErr =
	CRG.parseAnalysisRegions(std::move(IPtemp));
	if (!RegionsOrErr) {
	if (auto Err =
	handleErrors(RegionsOrErr.takeError(), [](const StringError &E) {
	WithColor::error() << E.getMessage() << '\n';
	})) {
	// Default case.
	WithColor::error() << toString(std::move(Err)) << '\n';
	}
	return 1;
	}
	const mca::AnalysisRegions &Regions = *RegionsOrErr;

	// Early exit if errors were found by the code region parsing logic.
	if (!Regions.isValid())
	return 1;

	if (Regions.empty()) {
	WithColor::error() << "no assembly instructions found.\n";
	return 1;
	}

	std::unique_ptr<mca::InstrumentManager> IM;
	if (!DisableInstrumentManager) {
	IM = std::unique_ptr<mca::InstrumentManager>(
	TheTarget->createInstrumentManager(STI, MCII));
	}
	if (!IM) {
	// If the target doesn't have its own IM implemented (or the -disable-cb
	// flag is set) then we use the base class (which does nothing).
	IM = std::make_unique<mca::InstrumentManager>(STI, MCII);
	}

	// Parse the input and create InstrumentRegion that llvm-mca
	// can use to improve analysis.
	- mca::AsmInstrumentRegionGenerator IRG(TheTarget, SrcMgr, Ctx, MAI, *STI,
	+ MCContext ICtx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
	+ std::unique_ptr<MCObjectFileInfo> IMOFI(
	+ TheTarget->createMCObjectFileInfo(ICtx, /PIC=/false));
	+ ICtx.setObjectFileInfo(IMOFI.get());
	+ mca::AsmInstrumentRegionGenerator IRG(TheTarget, SrcMgr, ICtx, MAI, *STI,
	MCII, IM);
	Expected<const mca::InstrumentRegions &> InstrumentRegionsOrErr =
	IRG.parseInstrumentRegions(std::move(IPtemp));
	if (!InstrumentRegionsOrErr) {
	if (auto Err = handleErrors(InstrumentRegionsOrErr.takeError(),
	[](const StringError &E) {
	WithColor::error() << E.getMessage() << '\n';
	})) {
	// Default case.
	WithColor::error() << toString(std::move(Err)) << '\n';
	}
	return 1;
	}
	const mca::InstrumentRegions &InstrumentRegions = *InstrumentRegionsOrErr;

	// Early exit if errors were found by the instrumentation parsing logic.
	if (!InstrumentRegions.isValid())
	return 1;

	// Now initialize the output file.
	auto OF = getOutputStream();
	if (std::error_code EC = OF.getError()) {
	WithColor::error() << EC.message() << '\n';
	return 1;
	}

	unsigned AssemblerDialect = CRG.getAssemblerDialect();
	if (OutputAsmVariant >= 0)
	AssemblerDialect = static_cast<unsigned>(OutputAsmVariant);
	std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
	Triple(TripleName), AssemblerDialect, MAI, MCII, *MRI));
	if (!IP) {
	WithColor::error()
	<< "unable to create instruction printer for target triple '"
	<< TheTriple.normalize() << "' with assembly variant "
	<< AssemblerDialect << ".\n";
	return 1;
	}

	// Set the display preference for hex vs. decimal immediates.
	IP->setPrintImmHex(PrintImmHex);

	std::unique_ptr<ToolOutputFile> TOF = std::move(*OF);

	const MCSchedModel &SM = STI->getSchedModel();

	std::unique_ptr<mca::InstrPostProcess> IPP;
	if (!DisableCustomBehaviour) {
	// TODO: It may be a good idea to separate CB and IPP so that they can
	// be used independently of each other. What I mean by this is to add
	// an extra command-line arg --disable-ipp so that CB and IPP can be
	// toggled without needing to toggle both of them together.
	IPP = std::unique_ptr<mca::InstrPostProcess>(
	TheTarget->createInstrPostProcess(STI, MCII));
	}
	if (!IPP) {
	// If the target doesn't have its own IPP implemented (or the -disable-cb
	// flag is set) then we use the base class (which does nothing).
	IPP = std::make_unique<mca::InstrPostProcess>(STI, MCII);
	}

	// Create an instruction builder.
	mca::InstrBuilder IB(STI, MCII, MRI, MCIA.get(), IM);

	// Create a context to control ownership of the pipeline hardware.
	mca::Context MCA(MRI, STI);

	mca::PipelineOptions PO(MicroOpQueue, DecoderThroughput, DispatchWidth,
	RegisterFileSize, LoadQueueSize, StoreQueueSize,
	AssumeNoAlias, EnableBottleneckAnalysis);

	// Number each region in the sequence.
	unsigned RegionIdx = 0;

	std::unique_ptr<MCCodeEmitter> MCE(
	- TheTarget->createMCCodeEmitter(*MCII, Ctx));
	+ TheTarget->createMCCodeEmitter(*MCII, ACtx));
	assert(MCE && "Unable to create code emitter!");

	std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
	STI, MRI, mc::InitMCTargetOptionsFromFlags()));
	assert(MAB && "Unable to create asm backend!");

	json::Object JSONOutput;
	for (const std::unique_ptr<mca::AnalysisRegion> &Region : Regions) {
	// Skip empty code regions.
	if (Region->empty())
	continue;

	IB.clear();

	// Lower the MCInst sequence into an mca::Instruction sequence.
	ArrayRef<MCInst> Insts = Region->getInstructions();
	mca::CodeEmitter CE(STI, MAB, *MCE, Insts);

	IPP->resetState();

	SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence;
	for (const MCInst &MCI : Insts) {
	SMLoc Loc = MCI.getLoc();
	const SmallVector<mca::SharedInstrument> Instruments =
	InstrumentRegions.getActiveInstruments(Loc);

	Expected<std::unique_ptr<mca::Instruction>> Inst =
	IB.createInstruction(MCI, Instruments);
	if (!Inst) {
	if (auto NewE = handleErrors(
	Inst.takeError(),
	[&IP, &STI](const mca::InstructionError<MCInst> &IE) {
	std::string InstructionStr;
	raw_string_ostream SS(InstructionStr);
	WithColor::error() << IE.Message << '\n';
	IP->printInst(&IE.Inst, 0, "", *STI, SS);
	SS.flush();
	WithColor::note()
	<< "instruction: " << InstructionStr << '\n';
	})) {
	// Default case.
	WithColor::error() << toString(std::move(NewE));
	}
	return 1;
	}

	IPP->postProcessInstruction(Inst.get(), MCI);

	LoweredSequence.emplace_back(std::move(Inst.get()));
	}

	mca::CircularSourceMgr S(LoweredSequence,
	PrintInstructionTables ? 1 : Iterations);

	if (PrintInstructionTables) {
	// Create a pipeline, stages, and a printer.
	auto P = std::make_unique<mca::Pipeline>();
	P->appendStage(std::make_unique<mca::EntryStage>(S));
	P->appendStage(std::make_unique<mca::InstructionTables>(SM));

	mca::PipelinePrinter Printer(P, Region, RegionIdx, *STI, PO);
	if (PrintJson) {
	Printer.addView(
	std::make_unique<mca::InstructionView>(STI, IP, Insts));
	}

	// Create the views for this pipeline, execute, and emit a report.
	if (PrintInstructionInfoView) {
	Printer.addView(std::make_unique<mca::InstructionInfoView>(
	STI, MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
	ShowBarriers));
	}
	Printer.addView(
	std::make_unique<mca::ResourcePressureView>(STI, IP, Insts));

	if (!runPipeline(*P))
	return 1;

	if (PrintJson) {
	Printer.printReport(JSONOutput);
	} else {
	Printer.printReport(TOF->os());
	}

	++RegionIdx;
	continue;
	}

	// Create the CustomBehaviour object for enforcing Target Specific
	// behaviours and dependencies that aren't expressed well enough
	// in the tablegen. CB cannot depend on the list of MCInst or
	// the source code (but it can depend on the list of
	// mca::Instruction or any objects that can be reconstructed
	// from the target information).
	std::unique_ptr<mca::CustomBehaviour> CB;
	if (!DisableCustomBehaviour)
	CB = std::unique_ptr<mca::CustomBehaviour>(
	TheTarget->createCustomBehaviour(STI, S, MCII));
	if (!CB)
	// If the target doesn't have its own CB implemented (or the -disable-cb
	// flag is set) then we use the base class (which does nothing).
	CB = std::make_unique<mca::CustomBehaviour>(STI, S, MCII);

	// Create a basic pipeline simulating an out-of-order backend.
	auto P = MCA.createDefaultPipeline(PO, S, *CB);

	mca::PipelinePrinter Printer(P, Region, RegionIdx, *STI, PO);

	// Targets can define their own custom Views that exist within their
	// /lib/Target/ directory so that the View can utilize their CustomBehaviour
	// or other backend symbols / functionality that are not already exposed
	// through one of the MC-layer classes. These Views will be initialized
	// using the CustomBehaviour::getViews() variants.
	// If a target makes a custom View that does not depend on their target
	// CB or their backend, they should put the View within
	// /tools/llvm-mca/Views/ instead.
	if (!DisableCustomBehaviour) {
	std::vector<std::unique_ptr<mca::View>> CBViews =
	CB->getStartViews(*IP, Insts);
	for (auto &CBView : CBViews)
	Printer.addView(std::move(CBView));
	}

	// When we output JSON, we add a view that contains the instructions
	// and CPU resource information.
	if (PrintJson) {
	auto IV = std::make_unique<mca::InstructionView>(STI, IP, Insts);
	Printer.addView(std::move(IV));
	}

	if (PrintSummaryView)
	Printer.addView(
	std::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));

	if (EnableBottleneckAnalysis) {
	if (!IsOutOfOrder) {
	WithColor::warning()
	<< "bottleneck analysis is not supported for in-order CPU '" << MCPU
	<< "'.\n";
	}
	Printer.addView(std::make_unique<mca::BottleneckAnalysis>(
	STI, IP, Insts, S.getNumIterations()));
	}

	if (PrintInstructionInfoView)
	Printer.addView(std::make_unique<mca::InstructionInfoView>(
	STI, MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
	ShowBarriers));

	// Fetch custom Views that are to be placed after the InstructionInfoView.
	// Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
	// for more info.
	if (!DisableCustomBehaviour) {
	std::vector<std::unique_ptr<mca::View>> CBViews =
	CB->getPostInstrInfoViews(*IP, Insts);
	for (auto &CBView : CBViews)
	Printer.addView(std::move(CBView));
	}

	if (PrintDispatchStats)
	Printer.addView(std::make_unique<mca::DispatchStatistics>());

	if (PrintSchedulerStats)
	Printer.addView(std::make_unique<mca::SchedulerStatistics>(*STI));

	if (PrintRetireStats)
	Printer.addView(std::make_unique<mca::RetireControlUnitStatistics>(SM));

	if (PrintRegisterFileStats)
	Printer.addView(std::make_unique<mca::RegisterFileStatistics>(*STI));

	if (PrintResourcePressureView)
	Printer.addView(
	std::make_unique<mca::ResourcePressureView>(STI, IP, Insts));

	if (PrintTimelineView) {
	unsigned TimelineIterations =
	TimelineMaxIterations ? TimelineMaxIterations : 10;
	Printer.addView(std::make_unique<mca::TimelineView>(
	STI, IP, Insts, std::min(TimelineIterations, S.getNumIterations()),
	TimelineMaxCycles));
	}

	// Fetch custom Views that are to be placed after all other Views.
	// Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
	// for more info.
	if (!DisableCustomBehaviour) {
	std::vector<std::unique_ptr<mca::View>> CBViews =
	CB->getEndViews(*IP, Insts);
	for (auto &CBView : CBViews)
	Printer.addView(std::move(CBView));
	}

	if (!runPipeline(*P))
	return 1;

	if (PrintJson) {
	Printer.printReport(JSONOutput);
	} else {
	Printer.printReport(TOF->os());
	}

	++RegionIdx;
	}

	if (PrintJson)
	TOF->os() << formatv("{0:2}", json::Value(std::move(JSONOutput))) << "\n";

	TOF->keep();
	return 0;
	}
	diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc
	index 514717a358fc..1daa9c25ab45 100644
	--- a/lib/clang/include/VCSVersion.inc
	+++ b/lib/clang/include/VCSVersion.inc
	@@ -1,10 +1,10 @@
	// $FreeBSD$

	-#define LLVM_REVISION "llvmorg-16.0.4-0-gae42196bc493"
	+#define LLVM_REVISION "llvmorg-16.0.5-0-g185b81e034ba"
	#define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"

	-#define CLANG_REVISION "llvmorg-16.0.4-0-gae42196bc493"
	+#define CLANG_REVISION "llvmorg-16.0.5-0-g185b81e034ba"
	#define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git"

	-#define LLDB_REVISION "llvmorg-16.0.4-0-gae42196bc493"
	+#define LLDB_REVISION "llvmorg-16.0.5-0-g185b81e034ba"
	#define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git"
	diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc
	index 301f49bfd08c..e99bc8fb4f95 100644
	--- a/lib/clang/include/clang/Basic/Version.inc
	+++ b/lib/clang/include/clang/Basic/Version.inc
	@@ -1,10 +1,10 @@
	/* $FreeBSD$ */

	-#define CLANG_VERSION 16.0.4
	-#define CLANG_VERSION_STRING "16.0.4"
	+#define CLANG_VERSION 16.0.5
	+#define CLANG_VERSION_STRING "16.0.5"
	#define CLANG_VERSION_MAJOR 16
	#define CLANG_VERSION_MAJOR_STRING "16"
	#define CLANG_VERSION_MINOR 0
	-#define CLANG_VERSION_PATCHLEVEL 4
	+#define CLANG_VERSION_PATCHLEVEL 5

	#define CLANG_VENDOR "FreeBSD "
	diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc
	index 61598755e168..f6061c3e6784 100644
	--- a/lib/clang/include/lld/Common/Version.inc
	+++ b/lib/clang/include/lld/Common/Version.inc
	@@ -1,4 +1,4 @@
	// Local identifier in __FreeBSD_version style
	#define LLD_FREEBSD_VERSION 1400006

	-#define LLD_VERSION_STRING "16.0.4 (FreeBSD llvmorg-16.0.4-0-gae42196bc493-" __XSTRING(LLD_FREEBSD_VERSION) ")"
	+#define LLD_VERSION_STRING "16.0.5 (FreeBSD llvmorg-16.0.5-0-g185b81e034ba-" __XSTRING(LLD_FREEBSD_VERSION) ")"
	diff --git a/lib/clang/include/lldb/Version/Version.inc b/lib/clang/include/lldb/Version/Version.inc
	index cdc1a2c63f72..982e4714b182 100644
	--- a/lib/clang/include/lldb/Version/Version.inc
	+++ b/lib/clang/include/lldb/Version/Version.inc
	@@ -1,6 +1,6 @@
	-#define LLDB_VERSION 16.0.4
	-#define LLDB_VERSION_STRING "16.0.4"
	+#define LLDB_VERSION 16.0.5
	+#define LLDB_VERSION_STRING "16.0.5"
	#define LLDB_VERSION_MAJOR 16
	#define LLDB_VERSION_MINOR 0
	-#define LLDB_VERSION_PATCH 4
	+#define LLDB_VERSION_PATCH 5
	/* #undef LLDB_FULL_VERSION_STRING */
	diff --git a/lib/clang/include/llvm/Config/config.h b/lib/clang/include/llvm/Config/config.h
	index 105378ec0dcd..d97252910e45 100644
	--- a/lib/clang/include/llvm/Config/config.h
	+++ b/lib/clang/include/llvm/Config/config.h
	@@ -1,380 +1,380 @@
	/* $FreeBSD$ */
	#ifndef CONFIG_H
	#define CONFIG_H

	// Include this header only under the llvm source tree.
	// This is a private header.

	/* Exported configuration */
	#include "llvm/Config/llvm-config.h"

	/* Bug report URL. */
	#define BUG_REPORT_URL "https://bugs.freebsd.org/submit/"

	/* Define to 1 to enable backtraces, and to 0 otherwise. */
	#define ENABLE_BACKTRACES 1

	/* Define to 1 to enable crash overrides, and to 0 otherwise. */
	#define ENABLE_CRASH_OVERRIDES 1

	/* Define to 1 to enable crash memory dumps, and to 0 otherwise. */
	#define LLVM_ENABLE_CRASH_DUMPS 0

	/* Define to 1 to prefer forward slashes on Windows, and to 0 prefer
	backslashes. */
	#define LLVM_WINDOWS_PREFER_FORWARD_SLASH 0

	/* Define to 1 if you have the `backtrace' function. */
	#define HAVE_BACKTRACE TRUE

	#define BACKTRACE_HEADER <execinfo.h>

	/* Define to 1 if you have the <CrashReporterClient.h> header file. */
	/* #undef HAVE_CRASHREPORTERCLIENT_H */

	/* can use __crashreporter_info__ */
	#if defined(__APPLE__)
	#define HAVE_CRASHREPORTER_INFO 1
	#else
	#define HAVE_CRASHREPORTER_INFO 0
	#endif

	/* Define to 1 if you have the declaration of `arc4random', and to 0 if you
	don't. */
	#define HAVE_DECL_ARC4RANDOM 1

	/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
	don't. */
	#define HAVE_DECL_FE_ALL_EXCEPT 1

	/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you
	don't. */
	#define HAVE_DECL_FE_INEXACT 1

	/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
	don't. */
	#define HAVE_DECL_STRERROR_S 0

	/* Define to 1 if you have the <dlfcn.h> header file. */
	#define HAVE_DLFCN_H 1

	/* Define if dlopen() is available on this platform. */
	#define HAVE_DLOPEN 1

	/* Define if dladdr() is available on this platform. */
	#define HAVE_DLADDR 1

	#if !defined(__arm__) \|\| defined(__USING_SJLJ_EXCEPTIONS__) \|\| defined(__ARM_DWARF_EH__)
	/* Define to 1 if we can register EH frames on this platform. */
	#define HAVE_REGISTER_FRAME 1

	/* Define to 1 if we can deregister EH frames on this platform. */
	#define HAVE_DEREGISTER_FRAME 1
	#endif // !arm \|\| USING_SJLJ_EXCEPTIONS \|\| ARM_DWARF_EH_

	/* Define if __unw_add_dynamic_fde() is available on this platform. */
	/* #undef HAVE_UNW_ADD_DYNAMIC_FDE */

	/* Define to 1 if you have the <errno.h> header file. */
	#define HAVE_ERRNO_H 1

	/* Define to 1 if you have the <fcntl.h> header file. */
	#define HAVE_FCNTL_H 1

	/* Define to 1 if you have the <fenv.h> header file. */
	#define HAVE_FENV_H 1

	/* Define if libffi is available on this platform. */
	/* #undef HAVE_FFI_CALL */

	/* Define to 1 if you have the <ffi/ffi.h> header file. */
	/* #undef HAVE_FFI_FFI_H */

	/* Define to 1 if you have the <ffi.h> header file. */
	/* #undef HAVE_FFI_H */

	/* Define to 1 if you have the `futimens' function. */
	#define HAVE_FUTIMENS 1

	/* Define to 1 if you have the `futimes' function. */
	#define HAVE_FUTIMES 1

	/* Define to 1 if you have the `getpagesize' function. */
	#define HAVE_GETPAGESIZE 1

	/* Define to 1 if you have the `getrlimit' function. */
	#define HAVE_GETRLIMIT 1

	/* Define to 1 if you have the `getrusage' function. */
	#define HAVE_GETRUSAGE 1

	/* Define to 1 if you have the `isatty' function. */
	#define HAVE_ISATTY 1

	/* Define to 1 if you have the `edit' library (-ledit). */
	#define HAVE_LIBEDIT TRUE

	/* Define to 1 if you have the `pfm' library (-lpfm). */
	/* #undef HAVE_LIBPFM */

	/* Define to 1 if the `perf_branch_entry' struct has field cycles. */
	/* #undef LIBPFM_HAS_FIELD_CYCLES */

	/* Define to 1 if you have the `psapi' library (-lpsapi). */
	/* #undef HAVE_LIBPSAPI */

	/* Define to 1 if you have the `pthread' library (-lpthread). */
	#define HAVE_LIBPTHREAD 1

	/* Define to 1 if you have the `pthread_getname_np' function. */
	#define HAVE_PTHREAD_GETNAME_NP 1

	/* Define to 1 if you have the `pthread_setname_np' function. */
	#define HAVE_PTHREAD_SETNAME_NP 1

	/* Define to 1 if you have the <link.h> header file. */
	#if __has_include(<link.h>)
	#define HAVE_LINK_H 1
	#else
	#define HAVE_LINK_H 0
	#endif

	/* Define to 1 if you have the <mach/mach.h> header file. */
	#if __has_include(<mach/mach.h>)
	#define HAVE_MACH_MACH_H 1
	#endif

	/* Define to 1 if you have the `mallctl' function. */
	#if defined(__FreeBSD__)
	#define HAVE_MALLCTL 1
	#endif

	/* Define to 1 if you have the `mallinfo' function. */
	#if defined(__linux__)
	#define HAVE_MALLINFO 1
	#endif

	/* Define to 1 if you have the `mallinfo2' function. */
	/* #undef HAVE_MALLINFO2 */

	/* Define to 1 if you have the <malloc/malloc.h> header file. */
	#if __has_include(<malloc/malloc.h>)
	#define HAVE_MALLOC_MALLOC_H 1
	#endif

	/* Define to 1 if you have the `malloc_zone_statistics' function. */
	#if defined(__APPLE__)
	#define HAVE_MALLOC_ZONE_STATISTICS 1
	#endif

	/* Define to 1 if you have the `posix_spawn' function. */
	#define HAVE_POSIX_SPAWN 1

	/* Define to 1 if you have the `pread' function. */
	#define HAVE_PREAD 1

	/* Define to 1 if you have the <pthread.h> header file. */
	#define HAVE_PTHREAD_H 1

	/* Have pthread_mutex_lock */
	#define HAVE_PTHREAD_MUTEX_LOCK 1

	/* Have pthread_rwlock_init */
	#define HAVE_PTHREAD_RWLOCK_INIT 1

	/* Define to 1 if you have the `sbrk' function. */
	#define HAVE_SBRK 1

	/* Define to 1 if you have the `setenv' function. */
	#define HAVE_SETENV 1

	/* Define to 1 if you have the `setrlimit' function. */
	#define HAVE_SETRLIMIT 1

	/* Define to 1 if you have the `sigaltstack' function. */
	#define HAVE_SIGALTSTACK 1

	/* Define to 1 if you have the <signal.h> header file. */
	#define HAVE_SIGNAL_H 1

	/* Define to 1 if you have the `strerror' function. */
	#define HAVE_STRERROR 1

	/* Define to 1 if you have the `strerror_r' function. */
	#define HAVE_STRERROR_R 1

	/* Define to 1 if you have the `sysconf' function. */
	#define HAVE_SYSCONF 1

	/* Define to 1 if you have the <sys/ioctl.h> header file. */
	#define HAVE_SYS_IOCTL_H 1

	/* Define to 1 if you have the <sys/mman.h> header file. */
	#define HAVE_SYS_MMAN_H 1

	/* Define to 1 if you have the <sys/param.h> header file. */
	#define HAVE_SYS_PARAM_H 1

	/* Define to 1 if you have the <sys/resource.h> header file. */
	#define HAVE_SYS_RESOURCE_H 1

	/* Define to 1 if you have the <sys/stat.h> header file. */
	#define HAVE_SYS_STAT_H 1

	/* Define to 1 if you have the <sys/time.h> header file. */
	#define HAVE_SYS_TIME_H 1

	/* Define to 1 if stat struct has st_mtimespec member .*/
	#if !defined(__linux__)
	#define HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC 1
	#endif

	/* Define to 1 if stat struct has st_mtim member. */
	#if !defined(__APPLE__)
	#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
	#endif

	/* Define to 1 if you have the <sys/types.h> header file. */
	#define HAVE_SYS_TYPES_H 1

	/* Define if the setupterm() function is supported this platform. */
	#if defined(__FreeBSD__)
	/*
	* This is only needed for terminalHasColors(). When disabled LLVM falls back
	* to checking a list of TERM prefixes which is sufficient for a bootstrap tool.
	*/
	#define LLVM_ENABLE_TERMINFO TRUE
	#endif

	/* Define to 1 if you have the <termios.h> header file. */
	#define HAVE_TERMIOS_H 1

	/* Define to 1 if you have the <unistd.h> header file. */
	#define HAVE_UNISTD_H 1

	/* Define to 1 if you have the <valgrind/valgrind.h> header file. */
	/* #undef HAVE_VALGRIND_VALGRIND_H */

	/* Have host's _alloca */
	/* #undef HAVE__ALLOCA */

	/* Define to 1 if you have the `_chsize_s' function. */
	/* #undef HAVE__CHSIZE_S */

	/* Define to 1 if you have the `_Unwind_Backtrace' function. */
	#define HAVE__UNWIND_BACKTRACE 1

	/* Have host's __alloca */
	/* #undef HAVE___ALLOCA */

	/* Have host's __ashldi3 */
	/* #undef HAVE___ASHLDI3 */

	/* Have host's __ashrdi3 */
	/* #undef HAVE___ASHRDI3 */

	/* Have host's __chkstk */
	/* #undef HAVE___CHKSTK */

	/* Have host's __chkstk_ms */
	/* #undef HAVE___CHKSTK_MS */

	/* Have host's __cmpdi2 */
	/* #undef HAVE___CMPDI2 */

	/* Have host's __divdi3 */
	/* #undef HAVE___DIVDI3 */

	/* Have host's __fixdfdi */
	/* #undef HAVE___FIXDFDI */

	/* Have host's __fixsfdi */
	/* #undef HAVE___FIXSFDI */

	/* Have host's __floatdidf */
	/* #undef HAVE___FLOATDIDF */

	/* Have host's __lshrdi3 */
	/* #undef HAVE___LSHRDI3 */

	/* Have host's __main */
	/* #undef HAVE___MAIN */

	/* Have host's __moddi3 */
	/* #undef HAVE___MODDI3 */

	/* Have host's __udivdi3 */
	/* #undef HAVE___UDIVDI3 */

	/* Have host's __umoddi3 */
	/* #undef HAVE___UMODDI3 */

	/* Have host's ___chkstk */
	/* #undef HAVE____CHKSTK */

	/* Have host's ___chkstk_ms */
	/* #undef HAVE____CHKSTK_MS */

	/* Linker version detected at compile time. */
	/* #undef HOST_LINK_VERSION */

	/* Define if overriding target triple is enabled */
	/* #undef LLVM_TARGET_TRIPLE_ENV */

	/* Whether tools show host and target info when invoked with --version */
	#define LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 1

	/* Define if libxml2 is supported on this platform. */
	/* #undef LLVM_ENABLE_LIBXML2 */

	/* Define to the extension used for shared libraries, say, ".so". */
	#if defined(__APPLE__)
	#define LTDL_SHLIB_EXT ".dylib"
	#else
	#define LTDL_SHLIB_EXT ".so"
	#endif

	/* Define to the extension used for plugin libraries, say, ".so". */
	#if defined(__APPLE__)
	#define LLVM_PLUGIN_EXT ".dylib"
	#else
	#define LLVM_PLUGIN_EXT ".so"
	#endif

	/* Define to the address where bug reports for this package should be sent. */
	#define PACKAGE_BUGREPORT "https://bugs.freebsd.org/submit/"

	/* Define to the full name of this package. */
	#define PACKAGE_NAME "LLVM"

	/* Define to the full name and version of this package. */
	-#define PACKAGE_STRING "LLVM 16.0.4"
	+#define PACKAGE_STRING "LLVM 16.0.5"

	/* Define to the version of this package. */
	-#define PACKAGE_VERSION "16.0.4"
	+#define PACKAGE_VERSION "16.0.5"

	/* Define to the vendor of this package. */
	/* #undef PACKAGE_VENDOR */

	/* Define to a function implementing stricmp */
	/* #undef stricmp */

	/* Define to a function implementing strdup */
	/* #undef strdup */

	/* Whether GlobalISel rule coverage is being collected */
	#define LLVM_GISEL_COV_ENABLED 0

	/* Define to the default GlobalISel coverage file prefix */
	/* #undef LLVM_GISEL_COV_PREFIX */

	/* Whether Timers signpost passes in Xcode Instruments */
	#if defined(__APPLE__)
	#define LLVM_SUPPORT_XCODE_SIGNPOSTS 1
	#else
	#define LLVM_SUPPORT_XCODE_SIGNPOSTS 0
	#endif

	/* #undef HAVE_PROC_PID_RUSAGE */

	#endif
	diff --git a/lib/clang/include/llvm/Config/llvm-config.h b/lib/clang/include/llvm/Config/llvm-config.h
	index 956e14142365..82d246c949f0 100644
	--- a/lib/clang/include/llvm/Config/llvm-config.h
	+++ b/lib/clang/include/llvm/Config/llvm-config.h
	@@ -1,132 +1,132 @@
	/* $FreeBSD$ */
	/===------- llvm/Config/llvm-config.h - llvm configuration -------- C --===/
	/* */
	/* Part of the LLVM Project, under the Apache License v2.0 with LLVM */
	/* Exceptions. */
	/* See https://llvm.org/LICENSE.txt for license information. */
	/* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */
	/* */
	/===----------------------------------------------------------------------===/

	/* This file enumerates variables from the LLVM configuration so that they
	can be in exported headers and won't override package specific directives.
	This is a C header that can be included in the llvm-c headers. */

	#ifndef LLVM_CONFIG_H
	#define LLVM_CONFIG_H

	/* Define if LLVM_ENABLE_DUMP is enabled */
	/* #undef LLVM_ENABLE_DUMP */

	/* Target triple LLVM will generate code for by default */
	/* Doesn't use `cmakedefine` because it is allowed to be empty. */
	/* #undef LLVM_DEFAULT_TARGET_TRIPLE */

	/* Define if threads enabled */
	#define LLVM_ENABLE_THREADS 1

	/* Has gcc/MSVC atomic intrinsics */
	#define LLVM_HAS_ATOMICS 1

	/* Host triple LLVM will be executed on */
	/* #undef LLVM_HOST_TRIPLE */

	/* LLVM architecture name for the native architecture, if available */
	/* #undef LLVM_NATIVE_ARCH */

	/* LLVM name for the native AsmParser init function, if available */
	/* #undef LLVM_NATIVE_ASMPARSER */

	/* LLVM name for the native AsmPrinter init function, if available */
	/* #undef LLVM_NATIVE_ASMPRINTER */

	/* LLVM name for the native Disassembler init function, if available */
	/* #undef LLVM_NATIVE_DISASSEMBLER */

	/* LLVM name for the native Target init function, if available */
	/* #undef LLVM_NATIVE_TARGET */

	/* LLVM name for the native TargetInfo init function, if available */
	/* #undef LLVM_NATIVE_TARGETINFO */

	/* LLVM name for the native target MC init function, if available */
	/* #undef LLVM_NATIVE_TARGETMC */

	/* LLVM name for the native target MCA init function, if available */
	/* #undef LLVM_NATIVE_TARGETMCA */

	/* Define if this is Unixish platform */
	#define LLVM_ON_UNIX 1

	/* Define if we have the Intel JIT API runtime support library */
	#define LLVM_USE_INTEL_JITEVENTS 0

	/* Define if we have the oprofile JIT-support library */
	#define LLVM_USE_OPROFILE 0

	/* Define if we have the perf JIT-support library */
	#define LLVM_USE_PERF 0

	/* Major version of the LLVM API */
	#define LLVM_VERSION_MAJOR 16

	/* Minor version of the LLVM API */
	#define LLVM_VERSION_MINOR 0

	/* Patch version of the LLVM API */
	-#define LLVM_VERSION_PATCH 4
	+#define LLVM_VERSION_PATCH 5

	/* LLVM version string */
	-#define LLVM_VERSION_STRING "16.0.4"
	+#define LLVM_VERSION_STRING "16.0.5"

	/* Whether LLVM records statistics for use with GetStatistics(),
	* PrintStatistics() or PrintStatisticsJSON()
	*/
	#define LLVM_FORCE_ENABLE_STATS 0

	/* Define if we have z3 and want to build it */
	/* #undef LLVM_WITH_Z3 */

	/* Define if we have curl and want to use it */
	/* #undef LLVM_ENABLE_CURL */

	/* Define if we have cpp-httplib and want to use it */
	/* #undef LLVM_ENABLE_HTTPLIB */

	/* Define if zlib compression is available */
	#define LLVM_ENABLE_ZLIB 1

	/* Define if zstd compression is available */
	#define LLVM_ENABLE_ZSTD 1

	/* Define if LLVM is using tflite instead of libtensorflow */
	/* #undef LLVM_HAVE_TFLITE */

	/* Define to 1 if you have the <sysexits.h> header file. */
	#define HAVE_SYSEXITS_H 1

	/* Define if the xar_open() function is supported on this platform. */
	#if defined(__APPLE__)
	#define LLVM_HAVE_LIBXAR 1
	#endif

	/* Define if building libLLVM shared library */
	/* #undef LLVM_BUILD_LLVM_DYLIB */

	/* Define if building LLVM with BUILD_SHARED_LIBS */
	/* #undef LLVM_BUILD_SHARED_LIBS */

	/* Define if building LLVM with LLVM_FORCE_USE_OLD_TOOLCHAIN_LIBS */
	/* #undef LLVM_FORCE_USE_OLD_TOOLCHAIN */

	/* Define if llvm_unreachable should be optimized with undefined behavior
	* in non assert builds */
	#define LLVM_UNREACHABLE_OPTIMIZE 1

	/* Define to 1 if you have the DIA SDK installed, and to 0 if you don't. */
	#define LLVM_ENABLE_DIA_SDK 0

	/* Define if plugins enabled */
	/* #undef LLVM_ENABLE_PLUGINS */

	#endif
	diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h
	index ca9915b70f68..95353f30f7ec 100644
	--- a/lib/clang/include/llvm/Support/VCSRevision.h
	+++ b/lib/clang/include/llvm/Support/VCSRevision.h
	@@ -1,3 +1,3 @@
	/* $FreeBSD$ */
	-#define LLVM_REVISION "llvmorg-16.0.4-0-gae42196bc493"
	+#define LLVM_REVISION "llvmorg-16.0.5-0-g185b81e034ba"
	#define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"

File Metadata

Mime Type: application/octet-stream
Expires: Fri, Jul 12, 9:51 PM (1 d, 23 h)
Storage Engine: chunks
Storage Format: Chunks
Storage Handle: a165crC6YS3v
Default Alt Text: (6 MB)

Offset	End	Complete
0	4194304	Yes
4194304	6557221	Yes

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions